commit 775870a7baedc48f8c3817582e9ae01ae2e4075f Author: James Antill Date: Mon Aug 8 14:11:01 2022 -0400 Import rpm: 432cb78217266d84b03f8eeefce139f91808411d diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f5dae2e --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +SOURCES/qemu-6.2.0.tar.xz diff --git a/0001-redhat-Adding-slirp-to-the-exploded-tree.patch b/0001-redhat-Adding-slirp-to-the-exploded-tree.patch new file mode 100644 index 0000000..43fbac3 --- /dev/null +++ b/0001-redhat-Adding-slirp-to-the-exploded-tree.patch @@ -0,0 +1,17931 @@ +From 0a17d5f6abf800e88069738904e3fcd8427ab28a Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Thu, 5 Aug 2021 01:07:55 -0400 +Subject: redhat: Adding slirp to the exploded tree + +RH-Author: Danilo de Paula +Message-id: <20190907020756.8619-1-ddepaula@redhat.com> +Patchwork-id: 90309 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/1] redhat: Adding slirp to the exploded tree +Bugzilla: +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Wainer dos Santos Moschetta + +Until qemu-kvm-3.1 slirp used to live as a regular folder in qemu-kvm. +After that it got moved into its own submodule. Which means it's not +part of the qemu-kvm git tree anymore. + +This passed unoticed for RHEL-AV-8.0.1 and 8.1.0 because qemu still ships +the code in the tarball. That's why scratch builds still works (it's based in +the tarball content). + +As we're receiving some CVE's against slirp, we need a way to patch +slirp in RHEL-8.1.0 without handling as a separate package (as we do for +firmwares). + +The simplest solution is to copy the slirp folder from the tarball into the +exploded tree. + +To be able to do that, I had to make some changes: + +slirp needs to be removed from .gitmodules, otherwise git complains +about files on it. + +Since "make -C redhat rh-brew" uses the tarball and apply all the +patches on top of it, we need to remove the folder from the tarball before applying +the patch (because we are actually re-applying them). + +We also need to use --ignore-submodule while generating the patches for +scratch-build, otherwise it will include some weird definition of the +slirp folder in the patch, something that /usr/bin/patch gets mad with. + +After that I compared the patch list, after and before this change, and +saw no major differences. + +This is an exploded-tree-only change and shouldn't be applied to dist-git. + +Signed-off-by: Danilo C. L. de Paula + +Rebase notes (weekly-210217): + - Upstream slirp updated to 8f43a99191afb47ca3f3c6972f6306209f367ece + +Rebase notes (6.1.0-rc2): +- Upstream slirp updated to a88d9ace234a24ce1c17189642ef9104799425e0 + +Merged commits (weekly-210203): + - a3f5f082f Drop bogus IPv6 messagesa + +Merged commits (weekly-210714): +- ce9ddeef04 Add mtod_check() +- 0609398e76 bootp: limit vendor-specific area to input packet memory buffer +- 377f755273 bootp: check bootp_input buffer size +- 4101e41f0d upd6: check udp6_input buffer size +- 7a663c9667 tftp: check tftp_input buffer size +- 76f81fc22c tftp: introduce a header structure +- 6903e9ba25 udp: check upd_input buffer size +- 8aa4fe0b6d Fix "DHCP broken in libslirp v4.6.0" +--- + .gitmodules | 3 - + slirp/.clang-format | 58 ++ + slirp/.gitignore | 11 + + slirp/.gitlab-ci.yml | 43 + + slirp/.gitpublish | 3 + + slirp/CHANGELOG.md | 184 ++++ + slirp/COPYRIGHT | 62 ++ + slirp/README.md | 60 ++ + slirp/build-aux/git-version-gen | 158 ++++ + slirp/meson.build | 162 ++++ + slirp/meson_options.txt | 2 + + slirp/src/arp_table.c | 94 ++ + slirp/src/bootp.c | 375 ++++++++ + slirp/src/bootp.h | 129 +++ + slirp/src/cksum.c | 179 ++++ + slirp/src/debug.h | 59 ++ + slirp/src/dhcpv6.c | 224 +++++ + slirp/src/dhcpv6.h | 68 ++ + slirp/src/dnssearch.c | 306 ++++++ + slirp/src/if.c | 215 +++++ + slirp/src/if.h | 25 + + slirp/src/ip.h | 242 +++++ + slirp/src/ip6.h | 214 +++++ + slirp/src/ip6_icmp.c | 444 +++++++++ + slirp/src/ip6_icmp.h | 220 +++++ + slirp/src/ip6_input.c | 88 ++ + slirp/src/ip6_output.c | 45 + + slirp/src/ip_icmp.c | 524 +++++++++++ + slirp/src/ip_icmp.h | 168 ++++ + slirp/src/ip_input.c | 463 +++++++++ + slirp/src/ip_output.c | 171 ++++ + slirp/src/libslirp-version.h.in | 24 + + slirp/src/libslirp.h | 236 +++++ + slirp/src/libslirp.map | 36 + + slirp/src/main.h | 16 + + slirp/src/mbuf.c | 281 ++++++ + slirp/src/mbuf.h | 192 ++++ + slirp/src/misc.c | 440 +++++++++ + slirp/src/misc.h | 72 ++ + slirp/src/ncsi-pkt.h | 445 +++++++++ + slirp/src/ncsi.c | 197 ++++ + slirp/src/ndp_table.c | 98 ++ + slirp/src/sbuf.c | 168 ++++ + slirp/src/sbuf.h | 27 + + slirp/src/slirp.c | 1387 +++++++++++++++++++++++++++ + slirp/src/slirp.h | 289 ++++++ + slirp/src/socket.c | 1104 ++++++++++++++++++++++ + slirp/src/socket.h | 186 ++++ + slirp/src/state.c | 379 ++++++++ + slirp/src/stream.c | 120 +++ + slirp/src/stream.h | 35 + + slirp/src/tcp.h | 169 ++++ + slirp/src/tcp_input.c | 1552 +++++++++++++++++++++++++++++++ + slirp/src/tcp_output.c | 516 ++++++++++ + slirp/src/tcp_subr.c | 1011 ++++++++++++++++++++ + slirp/src/tcp_timer.c | 286 ++++++ + slirp/src/tcp_timer.h | 130 +++ + slirp/src/tcp_var.h | 161 ++++ + slirp/src/tcpip.h | 104 +++ + slirp/src/tftp.c | 470 ++++++++++ + slirp/src/tftp.h | 58 ++ + slirp/src/udp.c | 425 +++++++++ + slirp/src/udp.h | 96 ++ + slirp/src/udp6.c | 196 ++++ + slirp/src/util.c | 441 +++++++++ + slirp/src/util.h | 203 ++++ + slirp/src/version.c | 8 + + slirp/src/vmstate.c | 444 +++++++++ + slirp/src/vmstate.h | 391 ++++++++ + 69 files changed, 17389 insertions(+), 3 deletions(-) + create mode 100644 slirp/.clang-format + create mode 100644 slirp/.gitignore + create mode 100644 slirp/.gitlab-ci.yml + create mode 100644 slirp/.gitpublish + create mode 100644 slirp/CHANGELOG.md + create mode 100644 slirp/COPYRIGHT + create mode 100644 slirp/README.md + create mode 100755 slirp/build-aux/git-version-gen + create mode 100644 slirp/meson.build + create mode 100644 slirp/meson_options.txt + create mode 100644 slirp/src/arp_table.c + create mode 100644 slirp/src/bootp.c + create mode 100644 slirp/src/bootp.h + create mode 100644 slirp/src/cksum.c + create mode 100644 slirp/src/debug.h + create mode 100644 slirp/src/dhcpv6.c + create mode 100644 slirp/src/dhcpv6.h + create mode 100644 slirp/src/dnssearch.c + create mode 100644 slirp/src/if.c + create mode 100644 slirp/src/if.h + create mode 100644 slirp/src/ip.h + create mode 100644 slirp/src/ip6.h + create mode 100644 slirp/src/ip6_icmp.c + create mode 100644 slirp/src/ip6_icmp.h + create mode 100644 slirp/src/ip6_input.c + create mode 100644 slirp/src/ip6_output.c + create mode 100644 slirp/src/ip_icmp.c + create mode 100644 slirp/src/ip_icmp.h + create mode 100644 slirp/src/ip_input.c + create mode 100644 slirp/src/ip_output.c + create mode 100644 slirp/src/libslirp-version.h.in + create mode 100644 slirp/src/libslirp.h + create mode 100644 slirp/src/libslirp.map + create mode 100644 slirp/src/main.h + create mode 100644 slirp/src/mbuf.c + create mode 100644 slirp/src/mbuf.h + create mode 100644 slirp/src/misc.c + create mode 100644 slirp/src/misc.h + create mode 100644 slirp/src/ncsi-pkt.h + create mode 100644 slirp/src/ncsi.c + create mode 100644 slirp/src/ndp_table.c + create mode 100644 slirp/src/sbuf.c + create mode 100644 slirp/src/sbuf.h + create mode 100644 slirp/src/slirp.c + create mode 100644 slirp/src/slirp.h + create mode 100644 slirp/src/socket.c + create mode 100644 slirp/src/socket.h + create mode 100644 slirp/src/state.c + create mode 100644 slirp/src/stream.c + create mode 100644 slirp/src/stream.h + create mode 100644 slirp/src/tcp.h + create mode 100644 slirp/src/tcp_input.c + create mode 100644 slirp/src/tcp_output.c + create mode 100644 slirp/src/tcp_subr.c + create mode 100644 slirp/src/tcp_timer.c + create mode 100644 slirp/src/tcp_timer.h + create mode 100644 slirp/src/tcp_var.h + create mode 100644 slirp/src/tcpip.h + create mode 100644 slirp/src/tftp.c + create mode 100644 slirp/src/tftp.h + create mode 100644 slirp/src/udp.c + create mode 100644 slirp/src/udp.h + create mode 100644 slirp/src/udp6.c + create mode 100644 slirp/src/util.c + create mode 100644 slirp/src/util.h + create mode 100644 slirp/src/version.c + create mode 100644 slirp/src/vmstate.c + create mode 100644 slirp/src/vmstate.h + +diff --git a/slirp/.clang-format b/slirp/.clang-format +new file mode 100644 +index 0000000000..17fb49fe65 +--- /dev/null ++++ b/slirp/.clang-format +@@ -0,0 +1,58 @@ ++# https://clang.llvm.org/docs/ClangFormat.html ++# https://clang.llvm.org/docs/ClangFormatStyleOptions.html ++--- ++Language: Cpp ++AlignAfterOpenBracket: Align ++AlignConsecutiveAssignments: false # although we like it, it creates churn ++AlignConsecutiveDeclarations: false ++AlignEscapedNewlinesLeft: true ++AlignOperands: true ++AlignTrailingComments: false # churn ++AllowAllParametersOfDeclarationOnNextLine: true ++AllowShortBlocksOnASingleLine: false ++AllowShortCaseLabelsOnASingleLine: false ++AllowShortFunctionsOnASingleLine: None ++AllowShortIfStatementsOnASingleLine: false ++AllowShortLoopsOnASingleLine: false ++AlwaysBreakAfterReturnType: None # AlwaysBreakAfterDefinitionReturnType is taken into account ++AlwaysBreakBeforeMultilineStrings: false ++BinPackArguments: true ++BinPackParameters: true ++BraceWrapping: ++ AfterControlStatement: false ++ AfterEnum: false ++ AfterFunction: true ++ AfterStruct: false ++ AfterUnion: false ++ BeforeElse: false ++ IndentBraces: false ++BreakBeforeBinaryOperators: None ++BreakBeforeBraces: Custom ++BreakBeforeTernaryOperators: false ++BreakStringLiterals: true ++ColumnLimit: 80 ++ContinuationIndentWidth: 4 ++Cpp11BracedListStyle: false ++DerivePointerAlignment: false ++DisableFormat: false ++IndentCaseLabels: false ++IndentWidth: 4 ++IndentWrappedFunctionNames: false ++KeepEmptyLinesAtTheStartOfBlocks: false ++MacroBlockBegin: '.*_BEGIN$' # only PREC_BEGIN ? ++MacroBlockEnd: '.*_END$' ++MaxEmptyLinesToKeep: 2 ++PointerAlignment: Right ++ReflowComments: true ++SortIncludes: false ++SpaceAfterCStyleCast: false ++SpaceBeforeAssignmentOperators: true ++SpaceBeforeParens: ControlStatements ++SpaceInEmptyParentheses: false ++SpacesBeforeTrailingComments: 1 ++SpacesInContainerLiterals: true ++SpacesInParentheses: false ++SpacesInSquareBrackets: false ++Standard: Auto ++UseTab: Never ++... +diff --git a/slirp/CHANGELOG.md b/slirp/CHANGELOG.md +new file mode 100644 +index 0000000000..bd4845ca29 +--- /dev/null ++++ b/slirp/CHANGELOG.md +@@ -0,0 +1,184 @@ ++# Changelog ++ ++All notable changes to this project will be documented in this file. ++ ++The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ++and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ++ ++## [4.6.1] - 2021-06-18 ++ ++### Fixed ++ ++ - Fix DHCP regression introduced in 4.6.0. !95 ++ ++## [4.6.0] - 2021-06-14 ++ ++### Added ++ ++ - mbuf: Add debugging helpers for allocation. !90 ++ ++### Changed ++ ++ - Revert "Set macOS deployment target to macOS 10.4". !93 ++ ++### Fixed ++ ++ - mtod()-related buffer overflows (CVE-2021-3592 #44, CVE-2021-3593 #45, ++ CVE-2021-3594 #47, CVE-2021-3595 #46). ++ - poll_fd: add missing fd registration for UDP and ICMP ++ - ncsi: make ncsi_calculate_checksum work with unaligned data. !89 ++ - Various typos and doc fixes. !88 ++ ++## [4.5.0] - 2021-05-18 ++ ++### Added ++ ++ - IPv6 forwarding. !62 !75 !77 ++ - slirp_neighbor_info() to dump the ARP/NDP tables. !71 ++ ++### Changed ++ ++ - Lazy guest address resolution for IPv6. !81 ++ - Improve signal handling when spawning a child. !61 ++ - Set macOS deployment target to macOS 10.4. !72 ++ - slirp_add_hostfwd: Ensure all error paths set errno. !80 ++ - More API documentation. ++ ++### Fixed ++ ++ - Assertion failure on unspecified IPv6 address. !86 ++ - Disable polling for PRI on MacOS, fixing some closing streams issues. !73 ++ - Various memory leak fixes on fastq/batchq. !68 ++ - Memory leak on IPv6 fast-send. !67 ++ - Slow socket response on Windows. !64 ++ - Misc build and code cleanups. !60 !63 !76 !79 !84 ++ ++## [4.4.0] - 2020-12-02 ++ ++### Added ++ ++ - udp, udp6, icmp: handle TTL value. !48 ++ - Enable forwarding ICMP errors. !49 ++ - Add DNS resolving for iOS. !54 ++ ++### Changed ++ ++ - Improve meson subproject() support. !53 ++ - Removed Makefile-based build system. !56 ++ ++### Fixed ++ ++ - socket: consume empty packets. !55 ++ - check pkt_len before reading protocol header (CVE-2020-29129). !57 ++ - ip_stripoptions use memmove (fixes undefined behaviour). !47 ++ - various Coverity-related changes/fixes. ++ ++## [4.3.1] - 2020-07-08 ++ ++### Changed ++ ++ - A silent truncation could occur in `slirp_fmt()`, which will now print a ++ critical message. See also #22. ++ ++### Fixed ++ ++ - CVE-2020-10756 - Drop bogus IPv6 messages that could lead to data leakage. ++ See !44 and !42. ++ - Fix win32 builds by using the SLIRP_PACKED definition. ++ - Various coverity scan errors fixed. !41 ++ - Fix new GCC warnings. !43 ++ ++## [4.3.0] - 2020-04-22 ++ ++### Added ++ ++ - `SLIRP_VERSION_STRING` macro, with the git sha suffix when building from git ++ - `SlirpConfig.disable_dns`, to disable DNS redirection #16 ++ ++### Changed ++ ++ - `slirp_version_string()` now has the git sha suffix when building form git ++ - Limit DNS redirection to port 53 #16 ++ ++### Fixed ++ ++ - Fix build regression with mingw & NetBSD ++ - Fix use-afte-free in `ip_reass()` (CVE-2020-1983) ++ ++## [4.2.0] - 2020-03-17 ++ ++### Added ++ ++ - New API function `slirp_add_unix`: add a forward rule to a Unix socket. ++ - New API function `slirp_remove_guestfwd`: remove a forward rule previously ++ added by `slirp_add_exec`, `slirp_add_unix` or `slirp_add_guestfwd` ++ - New `SlirpConfig.outbound_addr{,6}` fields to bind output socket to a ++ specific address ++ ++### Changed ++ ++ - socket: do not fallback on host loopback if `get_dns_addr()` failed ++ or the address is in slirp network ++ ++### Fixed ++ ++ - ncsi: fix checksum OOB memory access ++ - `tcp_emu()`: fix OOB accesses ++ - tftp: restrict relative path access ++ - state: fix loading of guestfwd state ++ ++## [4.1.0] - 2019-12-02 ++ ++### Added ++ ++ - The `slirp_new()` API, simpler and more extensible than `slirp_init()`. ++ - Allow custom MTU configuration. ++ - Option to disable host loopback connections. ++ - CI now runs scan-build too. ++ ++### Changed ++ ++ - Disable `tcp_emu()` by default. `tcp_emu()` is known to have caused ++ several CVEs, and not useful today in most cases. The feature can ++ be still enabled by setting `SlirpConfig.enable_emu` to true. ++ - meson build system is now `subproject()` friendly. ++ - Replace remaining `malloc()`/`free()` with glib (which aborts on OOM) ++ - Various code cleanups. ++ ++### Deprecated ++ ++ - The `slirp_init()` API. ++ ++### Fixed ++ ++ - `getpeername()` error after `shutdown(SHUT_WR)`. ++ - Exec forward: correctly parse command lines that contain spaces. ++ - Allow 0.0.0.0 destination address. ++ - Make host receive broadcast packets. ++ - Various memory related fixes (heap overflow, leaks, NULL ++ dereference). ++ - Compilation warnings, dead code. ++ ++## [4.0.0] - 2019-05-24 ++ ++### Added ++ ++ - Installable as a shared library. ++ - meson build system ++ (& make build system for in-tree QEMU integration) ++ ++### Changed ++ ++ - Standalone project, removing any QEMU dependency. ++ - License clarifications. ++ ++[Unreleased]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.6.1...master ++[4.6.1]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.6.0...v4.6.1 ++[4.6.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.5.0...v4.6.0 ++[4.5.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.4.0...v4.5.0 ++[4.4.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.3.1...v4.4.0 ++[4.3.1]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.3.0...v4.3.1 ++[4.3.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.2.0...v4.3.0 ++[4.2.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.1.0...v4.2.0 ++[4.1.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.0.0...v4.1.0 ++[4.0.0]: https://gitlab.freedesktop.org/slirp/libslirp/commits/v4.0.0 +diff --git a/slirp/COPYRIGHT b/slirp/COPYRIGHT +new file mode 100644 +index 0000000000..ed49512dbc +--- /dev/null ++++ b/slirp/COPYRIGHT +@@ -0,0 +1,62 @@ ++Slirp was written by Danny Gasparovski. ++Copyright (c), 1995,1996 All Rights Reserved. ++ ++Slirp is free software; "free" as in you don't have to pay for it, and you ++are free to do whatever you want with it. I do not accept any donations, ++monetary or otherwise, for Slirp. Instead, I would ask you to pass this ++potential donation to your favorite charity. In fact, I encourage ++*everyone* who finds Slirp useful to make a small donation to their ++favorite charity (for example, GreenPeace). This is not a requirement, but ++a suggestion from someone who highly values the service they provide. ++ ++The copyright terms and conditions: ++ ++---BEGIN--- ++ ++ Copyright (c) 1995,1996 Danny Gasparovski. All rights reserved. ++ ++ Redistribution and use in source and binary forms, with or without ++ modification, are permitted provided that the following conditions ++ are met: ++ 1. Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ 2. Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the distribution. ++ 3. Neither the name of the copyright holder nor the names of its ++ contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++ THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, ++ INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY ++ AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ++ DANNY GASPAROVSKI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, ++ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT ++ NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF ++ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++---END--- ++ ++This basically means you can do anything you want with the software, except ++1) call it your own, and 2) claim warranty on it. There is no warranty for ++this software. None. Nada. If you lose a million dollars while using ++Slirp, that's your loss not mine. So, ***USE AT YOUR OWN RISK!***. ++ ++If these conditions cannot be met due to legal restrictions (E.g. where it ++is against the law to give out Software without warranty), you must cease ++using the software and delete all copies you have. ++ ++Slirp uses code that is copyrighted by the following people/organizations: ++ ++Juha Pirkola. ++Gregory M. Christy. ++The Regents of the University of California. ++Carnegie Mellon University. ++The Australian National University. ++RSA Data Security, Inc. ++ ++Please read the top of each source file for the details on the various ++copyrights. +diff --git a/slirp/README.md b/slirp/README.md +new file mode 100644 +index 0000000000..9f9c1b14f6 +--- /dev/null ++++ b/slirp/README.md +@@ -0,0 +1,60 @@ ++# libslirp ++ ++libslirp is a user-mode networking library used by virtual machines, ++containers or various tools. ++ ++## Getting Started ++ ++### Prerequisites ++ ++A C compiler, meson and glib2 development libraries. ++ ++(see also [.gitlab-ci.yml](.gitlab-ci.yml) DEPS variable for the list ++of dependencies on Fedora) ++ ++### Building ++ ++You may build and install the shared library with meson: ++ ++``` sh ++meson build ++ninja -C build install ++``` ++And configure QEMU with --enable-slirp=system to link against it. ++ ++(QEMU may build with the submodule static library using --enable-slirp=git) ++ ++### Testing ++ ++Unfortunately, there are no automated tests available. ++ ++You may run QEMU ``-net user`` linked with your development version. ++ ++## Contributing ++ ++Feel free to open issues on the [project ++issues](https://gitlab.freedesktop.org/slirp/libslirp/issues) page. ++ ++You may clone the [gitlab ++project](https://gitlab.freedesktop.org/slirp/libslirp) and create a ++merge request. ++ ++Contributing with gitlab allows gitlab workflow, tracking issues, ++running CI etc. ++ ++Alternatively, you may send patches to slirp@lists.freedesktop.org ++mailing list. ++ ++## Versioning ++ ++We intend to use [libtool's ++versioning](https://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html) ++for the shared libraries and use [SemVer](http://semver.org/) for ++project versions. ++ ++For the versions available, see the [tags on this ++repository](https://gitlab.freedesktop.org/slirp/libslirp/releases). ++ ++## License ++ ++See the [COPYRIGHT](COPYRIGHT) file for details. +diff --git a/slirp/build-aux/git-version-gen b/slirp/build-aux/git-version-gen +new file mode 100755 +index 0000000000..5617eb8d4e +--- /dev/null ++++ b/slirp/build-aux/git-version-gen +@@ -0,0 +1,158 @@ ++#!/bin/sh ++# Print a version string. ++scriptversion=2010-06-14.19; # UTC ++ ++# Copyright (C) 2007-2010 Free Software Foundation, Inc. ++# ++# This program is free software: you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++ ++# This script is derived from GIT-VERSION-GEN from GIT: http://git.or.cz/. ++# It may be run two ways: ++# - from a git repository in which the "git describe" command below ++# produces useful output (thus requiring at least one signed tag) ++# - from a non-git-repo directory containing a .tarball-version file, which ++# presumes this script is invoked like "./git-version-gen .tarball-version". ++ ++# In order to use intra-version strings in your project, you will need two ++# separate generated version string files: ++# ++# .tarball-version - present only in a distribution tarball, and not in ++# a checked-out repository. Created with contents that were learned at ++# the last time autoconf was run, and used by git-version-gen. Must not ++# be present in either $(srcdir) or $(builddir) for git-version-gen to ++# give accurate answers during normal development with a checked out tree, ++# but must be present in a tarball when there is no version control system. ++# Therefore, it cannot be used in any dependencies. GNUmakefile has ++# hooks to force a reconfigure at distribution time to get the value ++# correct, without penalizing normal development with extra reconfigures. ++# ++# .version - present in a checked-out repository and in a distribution ++# tarball. Usable in dependencies, particularly for files that don't ++# want to depend on config.h but do want to track version changes. ++# Delete this file prior to any autoconf run where you want to rebuild ++# files to pick up a version string change; and leave it stale to ++# minimize rebuild time after unrelated changes to configure sources. ++# ++# It is probably wise to add these two files to .gitignore, so that you ++# don't accidentally commit either generated file. ++# ++# Use the following line in your configure.ac, so that $(VERSION) will ++# automatically be up-to-date each time configure is run (and note that ++# since configure.ac no longer includes a version string, Makefile rules ++# should not depend on configure.ac for version updates). ++# ++# AC_INIT([GNU project], ++# m4_esyscmd([build-aux/git-version-gen .tarball-version]), ++# [bug-project@example]) ++# ++# Then use the following lines in your Makefile.am, so that .version ++# will be present for dependencies, and so that .tarball-version will ++# exist in distribution tarballs. ++# ++# BUILT_SOURCES = $(top_srcdir)/.version ++# $(top_srcdir)/.version: ++# echo $(VERSION) > $@-t && mv $@-t $@ ++# dist-hook: ++# echo $(VERSION) > $(distdir)/.tarball-version ++ ++case $# in ++ 1|2) ;; ++ *) echo 1>&2 "Usage: $0 \$srcdir/.tarball-version" \ ++ '[TAG-NORMALIZATION-SED-SCRIPT]' ++ exit 1;; ++esac ++ ++tarball_version_file=$1 ++tag_sed_script="${2:-s/x/x/}" ++nl=' ++' ++ ++# Avoid meddling by environment variable of the same name. ++v= ++ ++# First see if there is a tarball-only version file. ++# then try "git describe", then default. ++if test -f $tarball_version_file ++then ++ v=`cat $tarball_version_file` || exit 1 ++ case $v in ++ *$nl*) v= ;; # reject multi-line output ++ [0-9]*) ;; ++ *) v= ;; ++ esac ++ test -z "$v" \ ++ && echo "$0: WARNING: $tarball_version_file seems to be damaged" 1>&2 ++fi ++ ++if test -n "$v" ++then ++ : # use $v ++elif test -d .git \ ++ && v=`git describe --abbrev=4 --match='v*' HEAD 2>/dev/null \ ++ || git describe --abbrev=4 HEAD 2>/dev/null` \ ++ && v=`printf '%s\n' "$v" | sed "$tag_sed_script"` \ ++ && case $v in ++ v[0-9]*) ;; ++ *) (exit 1) ;; ++ esac ++then ++ # Is this a new git that lists number of commits since the last ++ # tag or the previous older version that did not? ++ # Newer: v6.10-77-g0f8faeb ++ # Older: v6.10-g0f8faeb ++ case $v in ++ *-*-*) : git describe is okay three part flavor ;; ++ *-*) ++ : git describe is older two part flavor ++ # Recreate the number of commits and rewrite such that the ++ # result is the same as if we were using the newer version ++ # of git describe. ++ vtag=`echo "$v" | sed 's/-.*//'` ++ numcommits=`git rev-list "$vtag"..HEAD | wc -l` ++ v=`echo "$v" | sed "s/\(.*\)-\(.*\)/\1-$numcommits-\2/"`; ++ ;; ++ esac ++ ++ # Change the first '-' to a '.', so version-comparing tools work properly. ++ # Remove the "g" in git describe's output string, to save a byte. ++ v=`echo "$v" | sed 's/-/./;s/\(.*\)-g/\1-/'`; ++else ++ v=UNKNOWN ++fi ++ ++v=`echo "$v" |sed 's/^v//'` ++ ++# Don't declare a version "dirty" merely because a time stamp has changed. ++git update-index --refresh > /dev/null 2>&1 ++ ++dirty=`sh -c 'git diff-index --name-only HEAD' 2>/dev/null` || dirty= ++case "$dirty" in ++ '') ;; ++ *) # Append the suffix only if there isn't one already. ++ case $v in ++ *-dirty) ;; ++ *) v="$v-dirty" ;; ++ esac ;; ++esac ++ ++# Omit the trailing newline, so that m4_esyscmd can use the result directly. ++echo "$v" | tr -d "$nl" ++ ++# Local variables: ++# eval: (add-hook 'write-file-hooks 'time-stamp) ++# time-stamp-start: "scriptversion=" ++# time-stamp-format: "%:y-%02m-%02d.%02H" ++# time-stamp-time-zone: "UTC" ++# time-stamp-end: "; # UTC" ++# End: +diff --git a/slirp/meson.build b/slirp/meson.build +new file mode 100644 +index 0000000000..cb1396ad59 +--- /dev/null ++++ b/slirp/meson.build +@@ -0,0 +1,162 @@ ++project('libslirp', 'c', ++ version : '4.6.1', ++ license : 'BSD-3-Clause', ++ default_options : ['warning_level=1', 'c_std=gnu99'], ++ meson_version : '>= 0.50', ++) ++ ++version = meson.project_version() ++varr = version.split('.') ++major_version = varr[0] ++minor_version = varr[1] ++micro_version = varr[2] ++ ++conf = configuration_data() ++conf.set('SLIRP_MAJOR_VERSION', major_version) ++conf.set('SLIRP_MINOR_VERSION', minor_version) ++conf.set('SLIRP_MICRO_VERSION', micro_version) ++ ++full_version = run_command('build-aux/git-version-gen', ++ '@0@/.tarball-version'.format(meson.current_source_dir()), ++ check : true).stdout().strip() ++if full_version.startswith('UNKNOWN') ++ full_version = meson.project_version() ++elif not full_version.startswith(meson.project_version()) ++ error('meson.build project version @0@ does not match git-describe output @1@' ++ .format(meson.project_version(), full_version)) ++endif ++conf.set_quoted('SLIRP_VERSION_STRING', full_version + get_option('version_suffix')) ++ ++# libtool versioning - this applies to libslirp ++# ++# See http://sources.redhat.com/autobook/autobook/autobook_91.html#SEC91 for details ++# ++# - If interfaces have been changed or added, but binary compatibility ++# has been preserved, change: ++# CURRENT += 1 ++# REVISION = 0 ++# AGE += 1 ++# - If binary compatibility has been broken (eg removed or changed ++# interfaces), change: ++# CURRENT += 1 ++# REVISION = 0 ++# AGE = 0 ++# - If the interface is the same as the previous version, but bugs are ++# fixed, change: ++# REVISION += 1 ++lt_current = 3 ++lt_revision = 1 ++lt_age = 3 ++lt_version = '@0@.@1@.@2@'.format(lt_current - lt_age, lt_age, lt_revision) ++ ++host_system = host_machine.system() ++ ++glib_dep = dependency('glib-2.0') ++ ++cc = meson.get_compiler('c') ++ ++platform_deps = [] ++ ++if host_system == 'windows' ++ platform_deps += [ ++ cc.find_library('ws2_32'), ++ cc.find_library('iphlpapi') ++ ] ++elif host_system == 'darwin' ++ platform_deps += [ ++ cc.find_library('resolv') ++ ] ++endif ++ ++cargs = [ ++ '-DG_LOG_DOMAIN="Slirp"', ++] ++ ++if cc.check_header('valgrind/valgrind.h') ++ cargs += [ '-DHAVE_VALGRIND=1' ] ++endif ++ ++sources = [ ++ 'src/arp_table.c', ++ 'src/bootp.c', ++ 'src/cksum.c', ++ 'src/dhcpv6.c', ++ 'src/dnssearch.c', ++ 'src/if.c', ++ 'src/ip6_icmp.c', ++ 'src/ip6_input.c', ++ 'src/ip6_output.c', ++ 'src/ip_icmp.c', ++ 'src/ip_input.c', ++ 'src/ip_output.c', ++ 'src/mbuf.c', ++ 'src/misc.c', ++ 'src/ncsi.c', ++ 'src/ndp_table.c', ++ 'src/sbuf.c', ++ 'src/slirp.c', ++ 'src/socket.c', ++ 'src/state.c', ++ 'src/stream.c', ++ 'src/tcp_input.c', ++ 'src/tcp_output.c', ++ 'src/tcp_subr.c', ++ 'src/tcp_timer.c', ++ 'src/tftp.c', ++ 'src/udp.c', ++ 'src/udp6.c', ++ 'src/util.c', ++ 'src/version.c', ++ 'src/vmstate.c', ++] ++ ++mapfile = 'src/libslirp.map' ++vflag = [] ++vflag_test = '-Wl,--version-script,@0@/@1@'.format(meson.current_source_dir(), mapfile) ++if cc.has_link_argument(vflag_test) ++ vflag += vflag_test ++endif ++ ++install_devel = not meson.is_subproject() ++ ++configure_file( ++ input : 'src/libslirp-version.h.in', ++ output : 'libslirp-version.h', ++ install : install_devel, ++ install_dir : join_paths(get_option('includedir'), 'slirp'), ++ configuration : conf ++) ++ ++lib = library('slirp', sources, ++ version : lt_version, ++ c_args : cargs, ++ link_args : vflag, ++ link_depends : mapfile, ++ dependencies : [glib_dep, platform_deps], ++ install : install_devel or get_option('default_library') == 'shared', ++) ++ ++if install_devel ++ install_headers(['src/libslirp.h'], subdir : 'slirp') ++ ++ pkg = import('pkgconfig') ++ ++ pkg.generate( ++ version : version, ++ libraries : lib, ++ requires : [ ++ 'glib-2.0', ++ ], ++ name : 'slirp', ++ description : 'User-space network stack', ++ filebase : 'slirp', ++ subdirs : 'slirp', ++ ) ++else ++ if get_option('default_library') == 'both' ++ lib = lib.get_static_lib() ++ endif ++ libslirp_dep = declare_dependency( ++ include_directories: include_directories('.', 'src'), ++ link_with: lib) ++endif +diff --git a/slirp/meson_options.txt b/slirp/meson_options.txt +new file mode 100644 +index 0000000000..27e7c8059c +--- /dev/null ++++ b/slirp/meson_options.txt +@@ -0,0 +1,2 @@ ++option('version_suffix', type: 'string', value: '', ++ description: 'Suffix to append to SLIRP_VERSION_STRING') +diff --git a/slirp/src/arp_table.c b/slirp/src/arp_table.c +new file mode 100644 +index 0000000000..ba8c8a4eee +--- /dev/null ++++ b/slirp/src/arp_table.c +@@ -0,0 +1,94 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * ARP table ++ * ++ * Copyright (c) 2011 AdaCore ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++ ++#include "slirp.h" ++ ++#include ++ ++void arp_table_add(Slirp *slirp, uint32_t ip_addr, ++ const uint8_t ethaddr[ETH_ALEN]) ++{ ++ const uint32_t broadcast_addr = ++ ~slirp->vnetwork_mask.s_addr | slirp->vnetwork_addr.s_addr; ++ ArpTable *arptbl = &slirp->arp_table; ++ int i; ++ char ethaddr_str[ETH_ADDRSTRLEN]; ++ ++ DEBUG_CALL("arp_table_add"); ++ DEBUG_ARG("ip = %s", inet_ntoa((struct in_addr){ .s_addr = ip_addr })); ++ DEBUG_ARG("hw addr = %s", slirp_ether_ntoa(ethaddr, ethaddr_str, ++ sizeof(ethaddr_str))); ++ ++ if (ip_addr == 0 || ip_addr == 0xffffffff || ip_addr == broadcast_addr) { ++ /* Do not register broadcast addresses */ ++ return; ++ } ++ ++ /* Search for an entry */ ++ for (i = 0; i < ARP_TABLE_SIZE; i++) { ++ if (arptbl->table[i].ar_sip == ip_addr) { ++ /* Update the entry */ ++ memcpy(arptbl->table[i].ar_sha, ethaddr, ETH_ALEN); ++ return; ++ } ++ } ++ ++ /* No entry found, create a new one */ ++ arptbl->table[arptbl->next_victim].ar_sip = ip_addr; ++ memcpy(arptbl->table[arptbl->next_victim].ar_sha, ethaddr, ETH_ALEN); ++ arptbl->next_victim = (arptbl->next_victim + 1) % ARP_TABLE_SIZE; ++} ++ ++bool arp_table_search(Slirp *slirp, uint32_t ip_addr, ++ uint8_t out_ethaddr[ETH_ALEN]) ++{ ++ const uint32_t broadcast_addr = ++ ~slirp->vnetwork_mask.s_addr | slirp->vnetwork_addr.s_addr; ++ ArpTable *arptbl = &slirp->arp_table; ++ int i; ++ char ethaddr_str[ETH_ADDRSTRLEN]; ++ ++ DEBUG_CALL("arp_table_search"); ++ DEBUG_ARG("ip = %s", inet_ntoa((struct in_addr){ .s_addr = ip_addr })); ++ ++ /* If broadcast address */ ++ if (ip_addr == 0 || ip_addr == 0xffffffff || ip_addr == broadcast_addr) { ++ /* return Ethernet broadcast address */ ++ memset(out_ethaddr, 0xff, ETH_ALEN); ++ return 1; ++ } ++ ++ for (i = 0; i < ARP_TABLE_SIZE; i++) { ++ if (arptbl->table[i].ar_sip == ip_addr) { ++ memcpy(out_ethaddr, arptbl->table[i].ar_sha, ETH_ALEN); ++ DEBUG_ARG("found hw addr = %s", ++ slirp_ether_ntoa(out_ethaddr, ethaddr_str, ++ sizeof(ethaddr_str))); ++ return 1; ++ } ++ } ++ ++ return 0; ++} +diff --git a/slirp/src/bootp.c b/slirp/src/bootp.c +new file mode 100644 +index 0000000000..d78d61b44c +--- /dev/null ++++ b/slirp/src/bootp.c +@@ -0,0 +1,375 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * QEMU BOOTP/DHCP server ++ * ++ * Copyright (c) 2004 Fabrice Bellard ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "slirp.h" ++ ++#if defined(_WIN32) ++/* Windows ntohl() returns an u_long value. ++ * Add a type cast to match the format strings. */ ++#define ntohl(n) ((uint32_t)ntohl(n)) ++#endif ++ ++/* XXX: only DHCP is supported */ ++ ++#define LEASE_TIME (24 * 3600) ++ ++static const uint8_t rfc1533_cookie[] = { RFC1533_COOKIE }; ++ ++#define DPRINTF(fmt, ...) DEBUG_CALL(fmt, ##__VA_ARGS__) ++ ++static BOOTPClient *get_new_addr(Slirp *slirp, struct in_addr *paddr, ++ const uint8_t *macaddr) ++{ ++ BOOTPClient *bc; ++ int i; ++ ++ for (i = 0; i < NB_BOOTP_CLIENTS; i++) { ++ bc = &slirp->bootp_clients[i]; ++ if (!bc->allocated || !memcmp(macaddr, bc->macaddr, 6)) ++ goto found; ++ } ++ return NULL; ++found: ++ bc = &slirp->bootp_clients[i]; ++ bc->allocated = 1; ++ paddr->s_addr = slirp->vdhcp_startaddr.s_addr + htonl(i); ++ return bc; ++} ++ ++static BOOTPClient *request_addr(Slirp *slirp, const struct in_addr *paddr, ++ const uint8_t *macaddr) ++{ ++ uint32_t req_addr = ntohl(paddr->s_addr); ++ uint32_t dhcp_addr = ntohl(slirp->vdhcp_startaddr.s_addr); ++ BOOTPClient *bc; ++ ++ if (req_addr >= dhcp_addr && req_addr < (dhcp_addr + NB_BOOTP_CLIENTS)) { ++ bc = &slirp->bootp_clients[req_addr - dhcp_addr]; ++ if (!bc->allocated || !memcmp(macaddr, bc->macaddr, 6)) { ++ bc->allocated = 1; ++ return bc; ++ } ++ } ++ return NULL; ++} ++ ++static BOOTPClient *find_addr(Slirp *slirp, struct in_addr *paddr, ++ const uint8_t *macaddr) ++{ ++ BOOTPClient *bc; ++ int i; ++ ++ for (i = 0; i < NB_BOOTP_CLIENTS; i++) { ++ if (!memcmp(macaddr, slirp->bootp_clients[i].macaddr, 6)) ++ goto found; ++ } ++ return NULL; ++found: ++ bc = &slirp->bootp_clients[i]; ++ bc->allocated = 1; ++ paddr->s_addr = slirp->vdhcp_startaddr.s_addr + htonl(i); ++ return bc; ++} ++ ++static void dhcp_decode(const struct bootp_t *bp, ++ const uint8_t *bp_end, ++ int *pmsg_type, ++ struct in_addr *preq_addr) ++{ ++ const uint8_t *p; ++ int len, tag; ++ ++ *pmsg_type = 0; ++ preq_addr->s_addr = htonl(0L); ++ ++ p = bp->bp_vend; ++ if (memcmp(p, rfc1533_cookie, 4) != 0) ++ return; ++ p += 4; ++ while (p < bp_end) { ++ tag = p[0]; ++ if (tag == RFC1533_PAD) { ++ p++; ++ } else if (tag == RFC1533_END) { ++ break; ++ } else { ++ p++; ++ if (p >= bp_end) ++ break; ++ len = *p++; ++ if (p + len > bp_end) { ++ break; ++ } ++ DPRINTF("dhcp: tag=%d len=%d\n", tag, len); ++ ++ switch (tag) { ++ case RFC2132_MSG_TYPE: ++ if (len >= 1) ++ *pmsg_type = p[0]; ++ break; ++ case RFC2132_REQ_ADDR: ++ if (len >= 4) { ++ memcpy(&(preq_addr->s_addr), p, 4); ++ } ++ break; ++ default: ++ break; ++ } ++ p += len; ++ } ++ } ++ if (*pmsg_type == DHCPREQUEST && preq_addr->s_addr == htonl(0L) && ++ bp->bp_ciaddr.s_addr) { ++ memcpy(&(preq_addr->s_addr), &bp->bp_ciaddr, 4); ++ } ++} ++ ++static void bootp_reply(Slirp *slirp, ++ const struct bootp_t *bp, ++ const uint8_t *bp_end) ++{ ++ BOOTPClient *bc = NULL; ++ struct mbuf *m; ++ struct bootp_t *rbp; ++ struct sockaddr_in saddr, daddr; ++ struct in_addr preq_addr; ++ int dhcp_msg_type, val; ++ uint8_t *q; ++ uint8_t *end; ++ uint8_t client_ethaddr[ETH_ALEN]; ++ ++ /* extract exact DHCP msg type */ ++ dhcp_decode(bp, bp_end, &dhcp_msg_type, &preq_addr); ++ DPRINTF("bootp packet op=%d msgtype=%d", bp->bp_op, dhcp_msg_type); ++ if (preq_addr.s_addr != htonl(0L)) ++ DPRINTF(" req_addr=%08" PRIx32 "\n", ntohl(preq_addr.s_addr)); ++ else { ++ DPRINTF("\n"); ++ } ++ ++ if (dhcp_msg_type == 0) ++ dhcp_msg_type = DHCPREQUEST; /* Force reply for old BOOTP clients */ ++ ++ if (dhcp_msg_type != DHCPDISCOVER && dhcp_msg_type != DHCPREQUEST) ++ return; ++ ++ /* Get client's hardware address from bootp request */ ++ memcpy(client_ethaddr, bp->bp_hwaddr, ETH_ALEN); ++ ++ m = m_get(slirp); ++ if (!m) { ++ return; ++ } ++ m->m_data += IF_MAXLINKHDR; ++ m_inc(m, sizeof(struct bootp_t) + DHCP_OPT_LEN); ++ rbp = (struct bootp_t *)m->m_data; ++ m->m_data += sizeof(struct udpiphdr); ++ memset(rbp, 0, sizeof(struct bootp_t) + DHCP_OPT_LEN); ++ ++ if (dhcp_msg_type == DHCPDISCOVER) { ++ if (preq_addr.s_addr != htonl(0L)) { ++ bc = request_addr(slirp, &preq_addr, client_ethaddr); ++ if (bc) { ++ daddr.sin_addr = preq_addr; ++ } ++ } ++ if (!bc) { ++ new_addr: ++ bc = get_new_addr(slirp, &daddr.sin_addr, client_ethaddr); ++ if (!bc) { ++ DPRINTF("no address left\n"); ++ return; ++ } ++ } ++ memcpy(bc->macaddr, client_ethaddr, ETH_ALEN); ++ } else if (preq_addr.s_addr != htonl(0L)) { ++ bc = request_addr(slirp, &preq_addr, client_ethaddr); ++ if (bc) { ++ daddr.sin_addr = preq_addr; ++ memcpy(bc->macaddr, client_ethaddr, ETH_ALEN); ++ } else { ++ /* DHCPNAKs should be sent to broadcast */ ++ daddr.sin_addr.s_addr = 0xffffffff; ++ } ++ } else { ++ bc = find_addr(slirp, &daddr.sin_addr, bp->bp_hwaddr); ++ if (!bc) { ++ /* if never assigned, behaves as if it was already ++ assigned (windows fix because it remembers its address) */ ++ goto new_addr; ++ } ++ } ++ ++ /* Update ARP table for this IP address */ ++ arp_table_add(slirp, daddr.sin_addr.s_addr, client_ethaddr); ++ ++ saddr.sin_addr = slirp->vhost_addr; ++ saddr.sin_port = htons(BOOTP_SERVER); ++ ++ daddr.sin_port = htons(BOOTP_CLIENT); ++ ++ rbp->bp_op = BOOTP_REPLY; ++ rbp->bp_xid = bp->bp_xid; ++ rbp->bp_htype = 1; ++ rbp->bp_hlen = 6; ++ memcpy(rbp->bp_hwaddr, bp->bp_hwaddr, ETH_ALEN); ++ ++ rbp->bp_yiaddr = daddr.sin_addr; /* Client IP address */ ++ rbp->bp_siaddr = saddr.sin_addr; /* Server IP address */ ++ ++ q = rbp->bp_vend; ++ end = rbp->bp_vend + DHCP_OPT_LEN; ++ memcpy(q, rfc1533_cookie, 4); ++ q += 4; ++ ++ if (bc) { ++ DPRINTF("%s addr=%08" PRIx32 "\n", ++ (dhcp_msg_type == DHCPDISCOVER) ? "offered" : "ack'ed", ++ ntohl(daddr.sin_addr.s_addr)); ++ ++ if (dhcp_msg_type == DHCPDISCOVER) { ++ *q++ = RFC2132_MSG_TYPE; ++ *q++ = 1; ++ *q++ = DHCPOFFER; ++ } else /* DHCPREQUEST */ { ++ *q++ = RFC2132_MSG_TYPE; ++ *q++ = 1; ++ *q++ = DHCPACK; ++ } ++ ++ if (slirp->bootp_filename) { ++ g_assert(strlen(slirp->bootp_filename) < sizeof(rbp->bp_file)); ++ strcpy(rbp->bp_file, slirp->bootp_filename); ++ } ++ ++ *q++ = RFC2132_SRV_ID; ++ *q++ = 4; ++ memcpy(q, &saddr.sin_addr, 4); ++ q += 4; ++ ++ *q++ = RFC1533_NETMASK; ++ *q++ = 4; ++ memcpy(q, &slirp->vnetwork_mask, 4); ++ q += 4; ++ ++ if (!slirp->restricted) { ++ *q++ = RFC1533_GATEWAY; ++ *q++ = 4; ++ memcpy(q, &saddr.sin_addr, 4); ++ q += 4; ++ ++ *q++ = RFC1533_DNS; ++ *q++ = 4; ++ memcpy(q, &slirp->vnameserver_addr, 4); ++ q += 4; ++ } ++ ++ *q++ = RFC2132_LEASE_TIME; ++ *q++ = 4; ++ val = htonl(LEASE_TIME); ++ memcpy(q, &val, 4); ++ q += 4; ++ ++ if (*slirp->client_hostname) { ++ val = strlen(slirp->client_hostname); ++ if (q + val + 2 >= end) { ++ g_warning("DHCP packet size exceeded, " ++ "omitting host name option."); ++ } else { ++ *q++ = RFC1533_HOSTNAME; ++ *q++ = val; ++ memcpy(q, slirp->client_hostname, val); ++ q += val; ++ } ++ } ++ ++ if (slirp->vdomainname) { ++ val = strlen(slirp->vdomainname); ++ if (q + val + 2 >= end) { ++ g_warning("DHCP packet size exceeded, " ++ "omitting domain name option."); ++ } else { ++ *q++ = RFC1533_DOMAINNAME; ++ *q++ = val; ++ memcpy(q, slirp->vdomainname, val); ++ q += val; ++ } ++ } ++ ++ if (slirp->tftp_server_name) { ++ val = strlen(slirp->tftp_server_name); ++ if (q + val + 2 >= end) { ++ g_warning("DHCP packet size exceeded, " ++ "omitting tftp-server-name option."); ++ } else { ++ *q++ = RFC2132_TFTP_SERVER_NAME; ++ *q++ = val; ++ memcpy(q, slirp->tftp_server_name, val); ++ q += val; ++ } ++ } ++ ++ if (slirp->vdnssearch) { ++ val = slirp->vdnssearch_len; ++ if (q + val >= end) { ++ g_warning("DHCP packet size exceeded, " ++ "omitting domain-search option."); ++ } else { ++ memcpy(q, slirp->vdnssearch, val); ++ q += val; ++ } ++ } ++ } else { ++ static const char nak_msg[] = "requested address not available"; ++ ++ DPRINTF("nak'ed addr=%08" PRIx32 "\n", ntohl(preq_addr.s_addr)); ++ ++ *q++ = RFC2132_MSG_TYPE; ++ *q++ = 1; ++ *q++ = DHCPNAK; ++ ++ *q++ = RFC2132_MESSAGE; ++ *q++ = sizeof(nak_msg) - 1; ++ memcpy(q, nak_msg, sizeof(nak_msg) - 1); ++ q += sizeof(nak_msg) - 1; ++ } ++ assert(q < end); ++ *q++ = RFC1533_END; ++ ++ daddr.sin_addr.s_addr = 0xffffffffu; ++ ++ assert(q <= end); ++ ++ m->m_len = sizeof(struct bootp_t) + (end - rbp->bp_vend) - sizeof(struct ip) - sizeof(struct udphdr); ++ udp_output(NULL, m, &saddr, &daddr, IPTOS_LOWDELAY); ++} ++ ++void bootp_input(struct mbuf *m) ++{ ++ struct bootp_t *bp = mtod_check(m, sizeof(struct bootp_t)); ++ ++ if (bp && bp->bp_op == BOOTP_REQUEST) { ++ bootp_reply(m->slirp, bp, m_end(m)); ++ } ++} +diff --git a/slirp/src/bootp.h b/slirp/src/bootp.h +new file mode 100644 +index 0000000000..31ce5fd33f +--- /dev/null ++++ b/slirp/src/bootp.h +@@ -0,0 +1,129 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* bootp/dhcp defines */ ++ ++#ifndef SLIRP_BOOTP_H ++#define SLIRP_BOOTP_H ++ ++#define BOOTP_SERVER 67 ++#define BOOTP_CLIENT 68 ++ ++#define BOOTP_REQUEST 1 ++#define BOOTP_REPLY 2 ++ ++#define RFC1533_COOKIE 99, 130, 83, 99 ++#define RFC1533_PAD 0 ++#define RFC1533_NETMASK 1 ++#define RFC1533_TIMEOFFSET 2 ++#define RFC1533_GATEWAY 3 ++#define RFC1533_TIMESERVER 4 ++#define RFC1533_IEN116NS 5 ++#define RFC1533_DNS 6 ++#define RFC1533_LOGSERVER 7 ++#define RFC1533_COOKIESERVER 8 ++#define RFC1533_LPRSERVER 9 ++#define RFC1533_IMPRESSSERVER 10 ++#define RFC1533_RESOURCESERVER 11 ++#define RFC1533_HOSTNAME 12 ++#define RFC1533_BOOTFILESIZE 13 ++#define RFC1533_MERITDUMPFILE 14 ++#define RFC1533_DOMAINNAME 15 ++#define RFC1533_SWAPSERVER 16 ++#define RFC1533_ROOTPATH 17 ++#define RFC1533_EXTENSIONPATH 18 ++#define RFC1533_IPFORWARDING 19 ++#define RFC1533_IPSOURCEROUTING 20 ++#define RFC1533_IPPOLICYFILTER 21 ++#define RFC1533_IPMAXREASSEMBLY 22 ++#define RFC1533_IPTTL 23 ++#define RFC1533_IPMTU 24 ++#define RFC1533_IPMTUPLATEAU 25 ++#define RFC1533_INTMTU 26 ++#define RFC1533_INTLOCALSUBNETS 27 ++#define RFC1533_INTBROADCAST 28 ++#define RFC1533_INTICMPDISCOVER 29 ++#define RFC1533_INTICMPRESPOND 30 ++#define RFC1533_INTROUTEDISCOVER 31 ++#define RFC1533_INTROUTESOLICIT 32 ++#define RFC1533_INTSTATICROUTES 33 ++#define RFC1533_LLTRAILERENCAP 34 ++#define RFC1533_LLARPCACHETMO 35 ++#define RFC1533_LLETHERNETENCAP 36 ++#define RFC1533_TCPTTL 37 ++#define RFC1533_TCPKEEPALIVETMO 38 ++#define RFC1533_TCPKEEPALIVEGB 39 ++#define RFC1533_NISDOMAIN 40 ++#define RFC1533_NISSERVER 41 ++#define RFC1533_NTPSERVER 42 ++#define RFC1533_VENDOR 43 ++#define RFC1533_NBNS 44 ++#define RFC1533_NBDD 45 ++#define RFC1533_NBNT 46 ++#define RFC1533_NBSCOPE 47 ++#define RFC1533_XFS 48 ++#define RFC1533_XDM 49 ++ ++#define RFC2132_REQ_ADDR 50 ++#define RFC2132_LEASE_TIME 51 ++#define RFC2132_MSG_TYPE 53 ++#define RFC2132_SRV_ID 54 ++#define RFC2132_PARAM_LIST 55 ++#define RFC2132_MESSAGE 56 ++#define RFC2132_MAX_SIZE 57 ++#define RFC2132_RENEWAL_TIME 58 ++#define RFC2132_REBIND_TIME 59 ++#define RFC2132_TFTP_SERVER_NAME 66 ++ ++#define DHCPDISCOVER 1 ++#define DHCPOFFER 2 ++#define DHCPREQUEST 3 ++#define DHCPACK 5 ++#define DHCPNAK 6 ++ ++#define RFC1533_VENDOR_MAJOR 0 ++#define RFC1533_VENDOR_MINOR 0 ++ ++#define RFC1533_VENDOR_MAGIC 128 ++#define RFC1533_VENDOR_ADDPARM 129 ++#define RFC1533_VENDOR_ETHDEV 130 ++#define RFC1533_VENDOR_HOWTO 132 ++#define RFC1533_VENDOR_MNUOPTS 160 ++#define RFC1533_VENDOR_SELECTION 176 ++#define RFC1533_VENDOR_MOTD 184 ++#define RFC1533_VENDOR_NUMOFMOTD 8 ++#define RFC1533_VENDOR_IMG 192 ++#define RFC1533_VENDOR_NUMOFIMG 16 ++ ++#define RFC1533_END 255 ++#define BOOTP_VENDOR_LEN 64 ++#define DHCP_OPT_LEN 312 ++ ++struct bootp_t { ++ struct ip ip; ++ struct udphdr udp; ++ uint8_t bp_op; ++ uint8_t bp_htype; ++ uint8_t bp_hlen; ++ uint8_t bp_hops; ++ uint32_t bp_xid; ++ uint16_t bp_secs; ++ uint16_t unused; ++ struct in_addr bp_ciaddr; ++ struct in_addr bp_yiaddr; ++ struct in_addr bp_siaddr; ++ struct in_addr bp_giaddr; ++ uint8_t bp_hwaddr[16]; ++ uint8_t bp_sname[64]; ++ char bp_file[128]; ++ uint8_t bp_vend[]; ++}; ++ ++typedef struct { ++ uint16_t allocated; ++ uint8_t macaddr[6]; ++} BOOTPClient; ++ ++#define NB_BOOTP_CLIENTS 16 ++ ++void bootp_input(struct mbuf *m); ++ ++#endif +diff --git a/slirp/src/cksum.c b/slirp/src/cksum.c +new file mode 100644 +index 0000000000..b1cb97b7e1 +--- /dev/null ++++ b/slirp/src/cksum.c +@@ -0,0 +1,179 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1988, 1992, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93 ++ * in_cksum.c,v 1.2 1994/08/02 07:48:16 davidg Exp ++ */ ++ ++#include "slirp.h" ++ ++/* ++ * Checksum routine for Internet Protocol family headers (Portable Version). ++ * ++ * This routine is very heavily used in the network ++ * code and should be modified for each CPU to be as fast as possible. ++ * ++ * XXX Since we will never span more than 1 mbuf, we can optimise this ++ */ ++ ++#define ADDCARRY(x) (x > 65535 ? x -= 65535 : x) ++#define REDUCE \ ++ { \ ++ l_util.l = sum; \ ++ sum = l_util.s[0] + l_util.s[1]; \ ++ ADDCARRY(sum); \ ++ } ++ ++int cksum(struct mbuf *m, int len) ++{ ++ register uint16_t *w; ++ register int sum = 0; ++ register int mlen = 0; ++ int byte_swapped = 0; ++ ++ union { ++ uint8_t c[2]; ++ uint16_t s; ++ } s_util; ++ union { ++ uint16_t s[2]; ++ uint32_t l; ++ } l_util; ++ ++ if (m->m_len == 0) ++ goto cont; ++ w = mtod(m, uint16_t *); ++ ++ mlen = m->m_len; ++ ++ if (len < mlen) ++ mlen = len; ++ len -= mlen; ++ /* ++ * Force to even boundary. ++ */ ++ if ((1 & (uintptr_t)w) && (mlen > 0)) { ++ REDUCE; ++ sum <<= 8; ++ s_util.c[0] = *(uint8_t *)w; ++ w = (uint16_t *)((int8_t *)w + 1); ++ mlen--; ++ byte_swapped = 1; ++ } ++ /* ++ * Unroll the loop to make overhead from ++ * branches &c small. ++ */ ++ while ((mlen -= 32) >= 0) { ++ sum += w[0]; ++ sum += w[1]; ++ sum += w[2]; ++ sum += w[3]; ++ sum += w[4]; ++ sum += w[5]; ++ sum += w[6]; ++ sum += w[7]; ++ sum += w[8]; ++ sum += w[9]; ++ sum += w[10]; ++ sum += w[11]; ++ sum += w[12]; ++ sum += w[13]; ++ sum += w[14]; ++ sum += w[15]; ++ w += 16; ++ } ++ mlen += 32; ++ while ((mlen -= 8) >= 0) { ++ sum += w[0]; ++ sum += w[1]; ++ sum += w[2]; ++ sum += w[3]; ++ w += 4; ++ } ++ mlen += 8; ++ if (mlen == 0 && byte_swapped == 0) ++ goto cont; ++ REDUCE; ++ while ((mlen -= 2) >= 0) { ++ sum += *w++; ++ } ++ ++ if (byte_swapped) { ++ REDUCE; ++ sum <<= 8; ++ if (mlen == -1) { ++ s_util.c[1] = *(uint8_t *)w; ++ sum += s_util.s; ++ mlen = 0; ++ } else ++ ++ mlen = -1; ++ } else if (mlen == -1) ++ s_util.c[0] = *(uint8_t *)w; ++ ++cont: ++ if (len) { ++ DEBUG_ERROR("cksum: out of data"); ++ DEBUG_ERROR(" len = %d", len); ++ } ++ if (mlen == -1) { ++ /* The last mbuf has odd # of bytes. Follow the ++ standard (the odd byte may be shifted left by 8 bits ++ or not as determined by endian-ness of the machine) */ ++ s_util.c[1] = 0; ++ sum += s_util.s; ++ } ++ REDUCE; ++ return (~sum & 0xffff); ++} ++ ++int ip6_cksum(struct mbuf *m) ++{ ++ /* TODO: Optimize this by being able to pass the ip6_pseudohdr to cksum ++ * separately from the mbuf */ ++ struct ip6 save_ip, *ip = mtod(m, struct ip6 *); ++ struct ip6_pseudohdr *ih = mtod(m, struct ip6_pseudohdr *); ++ int sum; ++ ++ save_ip = *ip; ++ ++ ih->ih_src = save_ip.ip_src; ++ ih->ih_dst = save_ip.ip_dst; ++ ih->ih_pl = htonl((uint32_t)ntohs(save_ip.ip_pl)); ++ ih->ih_zero_hi = 0; ++ ih->ih_zero_lo = 0; ++ ih->ih_nh = save_ip.ip_nh; ++ ++ sum = cksum(m, ((int)sizeof(struct ip6_pseudohdr)) + ntohl(ih->ih_pl)); ++ ++ *ip = save_ip; ++ ++ return sum; ++} +diff --git a/slirp/src/debug.h b/slirp/src/debug.h +new file mode 100644 +index 0000000000..0f9f3eff3f +--- /dev/null ++++ b/slirp/src/debug.h +@@ -0,0 +1,59 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef DEBUG_H_ ++#define DEBUG_H_ ++ ++#define DBG_CALL (1 << 0) ++#define DBG_MISC (1 << 1) ++#define DBG_ERROR (1 << 2) ++#define DBG_TFTP (1 << 3) ++#define DBG_VERBOSE_CALL (1 << 4) ++ ++extern int slirp_debug; ++ ++#define DEBUG_CALL(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_CALL)) { \ ++ g_debug(fmt "...", ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define DEBUG_VERBOSE_CALL(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_VERBOSE_CALL)) { \ ++ g_debug(fmt "...", ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define DEBUG_ARG(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_CALL)) { \ ++ g_debug(" " fmt, ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define DEBUG_MISC(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_MISC)) { \ ++ g_debug(fmt, ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define DEBUG_ERROR(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_ERROR)) { \ ++ g_debug(fmt, ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define DEBUG_TFTP(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_TFTP)) { \ ++ g_debug(fmt, ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#endif /* DEBUG_H_ */ +diff --git a/slirp/src/dhcpv6.c b/slirp/src/dhcpv6.c +new file mode 100644 +index 0000000000..77b451b910 +--- /dev/null ++++ b/slirp/src/dhcpv6.c +@@ -0,0 +1,224 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * SLIRP stateless DHCPv6 ++ * ++ * We only support stateless DHCPv6, e.g. for network booting. ++ * See RFC 3315, RFC 3736, RFC 3646 and RFC 5970 for details. ++ * ++ * Copyright 2016 Thomas Huth, Red Hat Inc. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++ ++#include "slirp.h" ++#include "dhcpv6.h" ++ ++/* DHCPv6 message types */ ++#define MSGTYPE_REPLY 7 ++#define MSGTYPE_INFO_REQUEST 11 ++ ++/* DHCPv6 option types */ ++#define OPTION_CLIENTID 1 ++#define OPTION_IAADDR 5 ++#define OPTION_ORO 6 ++#define OPTION_DNS_SERVERS 23 ++#define OPTION_BOOTFILE_URL 59 ++ ++struct requested_infos { ++ uint8_t *client_id; ++ int client_id_len; ++ bool want_dns; ++ bool want_boot_url; ++}; ++ ++/** ++ * Analyze the info request message sent by the client to see what data it ++ * provided and what it wants to have. The information is gathered in the ++ * "requested_infos" struct. Note that client_id (if provided) points into ++ * the odata region, thus the caller must keep odata valid as long as it ++ * needs to access the requested_infos struct. ++ */ ++static int dhcpv6_parse_info_request(Slirp *slirp, uint8_t *odata, int olen, ++ struct requested_infos *ri) ++{ ++ int i, req_opt; ++ ++ while (olen > 4) { ++ /* Parse one option */ ++ int option = odata[0] << 8 | odata[1]; ++ int len = odata[2] << 8 | odata[3]; ++ ++ if (len + 4 > olen) { ++ slirp->cb->guest_error("Guest sent bad DHCPv6 packet!", ++ slirp->opaque); ++ return -E2BIG; ++ } ++ ++ switch (option) { ++ case OPTION_IAADDR: ++ /* According to RFC3315, we must discard requests with IA option */ ++ return -EINVAL; ++ case OPTION_CLIENTID: ++ if (len > 256) { ++ /* Avoid very long IDs which could cause problems later */ ++ return -E2BIG; ++ } ++ ri->client_id = odata + 4; ++ ri->client_id_len = len; ++ break; ++ case OPTION_ORO: /* Option request option */ ++ if (len & 1) { ++ return -EINVAL; ++ } ++ /* Check which options the client wants to have */ ++ for (i = 0; i < len; i += 2) { ++ req_opt = odata[4 + i] << 8 | odata[4 + i + 1]; ++ switch (req_opt) { ++ case OPTION_DNS_SERVERS: ++ ri->want_dns = true; ++ break; ++ case OPTION_BOOTFILE_URL: ++ ri->want_boot_url = true; ++ break; ++ default: ++ DEBUG_MISC("dhcpv6: Unsupported option request %d", ++ req_opt); ++ } ++ } ++ break; ++ default: ++ DEBUG_MISC("dhcpv6 info req: Unsupported option %d, len=%d", option, ++ len); ++ } ++ ++ odata += len + 4; ++ olen -= len + 4; ++ } ++ ++ return 0; ++} ++ ++ ++/** ++ * Handle information request messages ++ */ ++static void dhcpv6_info_request(Slirp *slirp, struct sockaddr_in6 *srcsas, ++ uint32_t xid, uint8_t *odata, int olen) ++{ ++ struct requested_infos ri = { NULL }; ++ struct sockaddr_in6 sa6, da6; ++ struct mbuf *m; ++ uint8_t *resp; ++ ++ if (dhcpv6_parse_info_request(slirp, odata, olen, &ri) < 0) { ++ return; ++ } ++ ++ m = m_get(slirp); ++ if (!m) { ++ return; ++ } ++ memset(m->m_data, 0, m->m_size); ++ m->m_data += IF_MAXLINKHDR; ++ resp = (uint8_t *)m->m_data + sizeof(struct ip6) + sizeof(struct udphdr); ++ ++ /* Fill in response */ ++ *resp++ = MSGTYPE_REPLY; ++ *resp++ = (uint8_t)(xid >> 16); ++ *resp++ = (uint8_t)(xid >> 8); ++ *resp++ = (uint8_t)xid; ++ ++ if (ri.client_id) { ++ *resp++ = OPTION_CLIENTID >> 8; /* option-code high byte */ ++ *resp++ = OPTION_CLIENTID; /* option-code low byte */ ++ *resp++ = ri.client_id_len >> 8; /* option-len high byte */ ++ *resp++ = ri.client_id_len; /* option-len low byte */ ++ memcpy(resp, ri.client_id, ri.client_id_len); ++ resp += ri.client_id_len; ++ } ++ if (ri.want_dns) { ++ *resp++ = OPTION_DNS_SERVERS >> 8; /* option-code high byte */ ++ *resp++ = OPTION_DNS_SERVERS; /* option-code low byte */ ++ *resp++ = 0; /* option-len high byte */ ++ *resp++ = 16; /* option-len low byte */ ++ memcpy(resp, &slirp->vnameserver_addr6, 16); ++ resp += 16; ++ } ++ if (ri.want_boot_url) { ++ uint8_t *sa = slirp->vhost_addr6.s6_addr; ++ int slen, smaxlen; ++ ++ *resp++ = OPTION_BOOTFILE_URL >> 8; /* option-code high byte */ ++ *resp++ = OPTION_BOOTFILE_URL; /* option-code low byte */ ++ smaxlen = (uint8_t *)m->m_data + slirp->if_mtu - (resp + 2); ++ slen = slirp_fmt((char *)resp + 2, smaxlen, ++ "tftp://[%02x%02x:%02x%02x:%02x%02x:%02x%02x:" ++ "%02x%02x:%02x%02x:%02x%02x:%02x%02x]/%s", ++ sa[0], sa[1], sa[2], sa[3], sa[4], sa[5], sa[6], sa[7], ++ sa[8], sa[9], sa[10], sa[11], sa[12], sa[13], sa[14], ++ sa[15], slirp->bootp_filename); ++ *resp++ = slen >> 8; /* option-len high byte */ ++ *resp++ = slen; /* option-len low byte */ ++ resp += slen; ++ } ++ ++ sa6.sin6_addr = slirp->vhost_addr6; ++ sa6.sin6_port = DHCPV6_SERVER_PORT; ++ da6.sin6_addr = srcsas->sin6_addr; ++ da6.sin6_port = srcsas->sin6_port; ++ m->m_data += sizeof(struct ip6) + sizeof(struct udphdr); ++ m->m_len = resp - (uint8_t *)m->m_data; ++ udp6_output(NULL, m, &sa6, &da6); ++} ++ ++/** ++ * Handle DHCPv6 messages sent by the client ++ */ ++void dhcpv6_input(struct sockaddr_in6 *srcsas, struct mbuf *m) ++{ ++ uint8_t *data = (uint8_t *)m->m_data + sizeof(struct udphdr); ++ int data_len = m->m_len - sizeof(struct udphdr); ++ uint32_t xid; ++ ++ if (data_len < 4) { ++ return; ++ } ++ ++ xid = ntohl(*(uint32_t *)data) & 0xffffff; ++ ++ switch (data[0]) { ++ case MSGTYPE_INFO_REQUEST: ++ dhcpv6_info_request(m->slirp, srcsas, xid, &data[4], data_len - 4); ++ break; ++ default: ++ DEBUG_MISC("dhcpv6_input: Unsupported message type 0x%x", data[0]); ++ } ++} +diff --git a/slirp/src/dhcpv6.h b/slirp/src/dhcpv6.h +new file mode 100644 +index 0000000000..d12c49b36c +--- /dev/null ++++ b/slirp/src/dhcpv6.h +@@ -0,0 +1,68 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Definitions and prototypes for SLIRP stateless DHCPv6 ++ * ++ * Copyright 2016 Thomas Huth, Red Hat Inc. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++#ifndef SLIRP_DHCPV6_H ++#define SLIRP_DHCPV6_H ++ ++#define DHCPV6_SERVER_PORT 547 ++ ++#define ALLDHCP_MULTICAST \ ++ { \ ++ .s6_addr = { \ ++ 0xff, \ ++ 0x02, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x01, \ ++ 0x00, \ ++ 0x02 \ ++ } \ ++ } ++ ++#define in6_dhcp_multicast(a) in6_equal(a, &(struct in6_addr)ALLDHCP_MULTICAST) ++ ++void dhcpv6_input(struct sockaddr_in6 *srcsas, struct mbuf *m); ++ ++#endif +diff --git a/slirp/src/dnssearch.c b/slirp/src/dnssearch.c +new file mode 100644 +index 0000000000..55497e860e +--- /dev/null ++++ b/slirp/src/dnssearch.c +@@ -0,0 +1,306 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * Domain search option for DHCP (RFC 3397) ++ * ++ * Copyright (c) 2012 Klaus Stengel ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++ ++#include "slirp.h" ++ ++static const uint8_t RFC3397_OPT_DOMAIN_SEARCH = 119; ++static const uint8_t MAX_OPT_LEN = 255; ++static const uint8_t OPT_HEADER_LEN = 2; ++static const uint8_t REFERENCE_LEN = 2; ++ ++struct compact_domain; ++ ++typedef struct compact_domain { ++ struct compact_domain *self; ++ struct compact_domain *refdom; ++ uint8_t *labels; ++ size_t len; ++ size_t common_octets; ++} CompactDomain; ++ ++static size_t domain_suffix_diffoff(const CompactDomain *a, ++ const CompactDomain *b) ++{ ++ size_t la = a->len, lb = b->len; ++ uint8_t *da = a->labels + la, *db = b->labels + lb; ++ size_t i, lm = (la < lb) ? la : lb; ++ ++ for (i = 0; i < lm; i++) { ++ da--; ++ db--; ++ if (*da != *db) { ++ break; ++ } ++ } ++ return i; ++} ++ ++static int domain_suffix_ord(const void *cva, const void *cvb) ++{ ++ const CompactDomain *a = cva, *b = cvb; ++ size_t la = a->len, lb = b->len; ++ size_t doff = domain_suffix_diffoff(a, b); ++ uint8_t ca = a->labels[la - doff]; ++ uint8_t cb = b->labels[lb - doff]; ++ ++ if (ca < cb) { ++ return -1; ++ } ++ if (ca > cb) { ++ return 1; ++ } ++ if (la < lb) { ++ return -1; ++ } ++ if (la > lb) { ++ return 1; ++ } ++ return 0; ++} ++ ++static size_t domain_common_label(CompactDomain *a, CompactDomain *b) ++{ ++ size_t res, doff = domain_suffix_diffoff(a, b); ++ uint8_t *first_eq_pos = a->labels + (a->len - doff); ++ uint8_t *label = a->labels; ++ ++ while (*label && label < first_eq_pos) { ++ label += *label + 1; ++ } ++ res = a->len - (label - a->labels); ++ /* only report if it can help to reduce the packet size */ ++ return (res > REFERENCE_LEN) ? res : 0; ++} ++ ++static void domain_fixup_order(CompactDomain *cd, size_t n) ++{ ++ size_t i; ++ ++ for (i = 0; i < n; i++) { ++ CompactDomain *cur = cd + i, *next = cd[i].self; ++ ++ while (!cur->common_octets) { ++ CompactDomain *tmp = next->self; /* backup target value */ ++ ++ next->self = cur; ++ cur->common_octets++; ++ ++ cur = next; ++ next = tmp; ++ } ++ } ++} ++ ++static void domain_mklabels(CompactDomain *cd, const char *input) ++{ ++ uint8_t *len_marker = cd->labels; ++ uint8_t *output = len_marker; /* pre-incremented */ ++ const char *in = input; ++ char cur_chr; ++ size_t len = 0; ++ ++ if (cd->len == 0) { ++ goto fail; ++ } ++ cd->len++; ++ ++ do { ++ cur_chr = *in++; ++ if (cur_chr == '.' || cur_chr == '\0') { ++ len = output - len_marker; ++ if ((len == 0 && cur_chr == '.') || len >= 64) { ++ goto fail; ++ } ++ *len_marker = len; ++ ++ output++; ++ len_marker = output; ++ } else { ++ output++; ++ *output = cur_chr; ++ } ++ } while (cur_chr != '\0'); ++ ++ /* ensure proper zero-termination */ ++ if (len != 0) { ++ *len_marker = 0; ++ cd->len++; ++ } ++ return; ++ ++fail: ++ g_warning("failed to parse domain name '%s'\n", input); ++ cd->len = 0; ++} ++ ++static void domain_mkxrefs(CompactDomain *doms, CompactDomain *last, ++ size_t depth) ++{ ++ CompactDomain *i = doms, *target = doms; ++ ++ do { ++ if (i->labels < target->labels) { ++ target = i; ++ } ++ } while (i++ != last); ++ ++ for (i = doms; i != last; i++) { ++ CompactDomain *group_last; ++ size_t next_depth; ++ ++ if (i->common_octets == depth) { ++ continue; ++ } ++ ++ next_depth = -1; ++ for (group_last = i; group_last != last; group_last++) { ++ size_t co = group_last->common_octets; ++ if (co <= depth) { ++ break; ++ } ++ if (co < next_depth) { ++ next_depth = co; ++ } ++ } ++ domain_mkxrefs(i, group_last, next_depth); ++ ++ i = group_last; ++ if (i == last) { ++ break; ++ } ++ } ++ ++ if (depth == 0) { ++ return; ++ } ++ ++ i = doms; ++ do { ++ if (i != target && i->refdom == NULL) { ++ i->refdom = target; ++ i->common_octets = depth; ++ } ++ } while (i++ != last); ++} ++ ++static size_t domain_compactify(CompactDomain *domains, size_t n) ++{ ++ uint8_t *start = domains->self->labels, *outptr = start; ++ size_t i; ++ ++ for (i = 0; i < n; i++) { ++ CompactDomain *cd = domains[i].self; ++ CompactDomain *rd = cd->refdom; ++ ++ if (rd != NULL) { ++ size_t moff = (rd->labels - start) + (rd->len - cd->common_octets); ++ if (moff < 0x3FFFu) { ++ cd->len -= cd->common_octets - 2; ++ cd->labels[cd->len - 1] = moff & 0xFFu; ++ cd->labels[cd->len - 2] = 0xC0u | (moff >> 8); ++ } ++ } ++ ++ if (cd->labels != outptr) { ++ memmove(outptr, cd->labels, cd->len); ++ cd->labels = outptr; ++ } ++ outptr += cd->len; ++ } ++ return outptr - start; ++} ++ ++int translate_dnssearch(Slirp *s, const char **names) ++{ ++ size_t blocks, bsrc_start, bsrc_end, bdst_start; ++ size_t i, num_domains, memreq = 0; ++ uint8_t *result = NULL, *outptr; ++ CompactDomain *domains = NULL; ++ ++ num_domains = g_strv_length((GStrv)(void *)names); ++ if (num_domains == 0) { ++ return -2; ++ } ++ ++ domains = g_malloc(num_domains * sizeof(*domains)); ++ ++ for (i = 0; i < num_domains; i++) { ++ size_t nlen = strlen(names[i]); ++ memreq += nlen + 2; /* 1 zero octet + 1 label length octet */ ++ domains[i].self = domains + i; ++ domains[i].len = nlen; ++ domains[i].common_octets = 0; ++ domains[i].refdom = NULL; ++ } ++ ++ /* reserve extra 2 header bytes for each 255 bytes of output */ ++ memreq += DIV_ROUND_UP(memreq, MAX_OPT_LEN) * OPT_HEADER_LEN; ++ result = g_malloc(memreq * sizeof(*result)); ++ ++ outptr = result; ++ for (i = 0; i < num_domains; i++) { ++ domains[i].labels = outptr; ++ domain_mklabels(domains + i, names[i]); ++ outptr += domains[i].len; ++ } ++ ++ if (outptr == result) { ++ g_free(domains); ++ g_free(result); ++ return -1; ++ } ++ ++ qsort(domains, num_domains, sizeof(*domains), domain_suffix_ord); ++ domain_fixup_order(domains, num_domains); ++ ++ for (i = 1; i < num_domains; i++) { ++ size_t cl = domain_common_label(domains + i - 1, domains + i); ++ domains[i - 1].common_octets = cl; ++ } ++ ++ domain_mkxrefs(domains, domains + num_domains - 1, 0); ++ memreq = domain_compactify(domains, num_domains); ++ ++ blocks = DIV_ROUND_UP(memreq, MAX_OPT_LEN); ++ bsrc_end = memreq; ++ bsrc_start = (blocks - 1) * MAX_OPT_LEN; ++ bdst_start = bsrc_start + blocks * OPT_HEADER_LEN; ++ memreq += blocks * OPT_HEADER_LEN; ++ ++ while (blocks--) { ++ size_t len = bsrc_end - bsrc_start; ++ memmove(result + bdst_start, result + bsrc_start, len); ++ result[bdst_start - 2] = RFC3397_OPT_DOMAIN_SEARCH; ++ result[bdst_start - 1] = len; ++ bsrc_end = bsrc_start; ++ bsrc_start -= MAX_OPT_LEN; ++ bdst_start -= MAX_OPT_LEN + OPT_HEADER_LEN; ++ } ++ ++ g_free(domains); ++ s->vdnssearch = result; ++ s->vdnssearch_len = memreq; ++ return 0; ++} +diff --git a/slirp/src/if.c b/slirp/src/if.c +new file mode 100644 +index 0000000000..9a1eec97b8 +--- /dev/null ++++ b/slirp/src/if.c +@@ -0,0 +1,215 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++ ++static void ifs_insque(struct mbuf *ifm, struct mbuf *ifmhead) ++{ ++ ifm->ifs_next = ifmhead->ifs_next; ++ ifmhead->ifs_next = ifm; ++ ifm->ifs_prev = ifmhead; ++ ifm->ifs_next->ifs_prev = ifm; ++} ++ ++static void ifs_remque(struct mbuf *ifm) ++{ ++ ifm->ifs_prev->ifs_next = ifm->ifs_next; ++ ifm->ifs_next->ifs_prev = ifm->ifs_prev; ++} ++ ++void if_init(Slirp *slirp) ++{ ++ slirp->if_fastq.qh_link = slirp->if_fastq.qh_rlink = &slirp->if_fastq; ++ slirp->if_batchq.qh_link = slirp->if_batchq.qh_rlink = &slirp->if_batchq; ++} ++ ++/* ++ * if_output: Queue packet into an output queue. ++ * There are 2 output queue's, if_fastq and if_batchq. ++ * Each output queue is a doubly linked list of double linked lists ++ * of mbufs, each list belonging to one "session" (socket). This ++ * way, we can output packets fairly by sending one packet from each ++ * session, instead of all the packets from one session, then all packets ++ * from the next session, etc. Packets on the if_fastq get absolute ++ * priority, but if one session hogs the link, it gets "downgraded" ++ * to the batchq until it runs out of packets, then it'll return ++ * to the fastq (eg. if the user does an ls -alR in a telnet session, ++ * it'll temporarily get downgraded to the batchq) ++ */ ++void if_output(struct socket *so, struct mbuf *ifm) ++{ ++ Slirp *slirp = ifm->slirp; ++ M_DUP_DEBUG(slirp, ifm, 0, 0); ++ ++ struct mbuf *ifq; ++ int on_fastq = 1; ++ ++ DEBUG_CALL("if_output"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("ifm = %p", ifm); ++ ++ /* ++ * First remove the mbuf from m_usedlist, ++ * since we're gonna use m_next and m_prev ourselves ++ * XXX Shouldn't need this, gotta change dtom() etc. ++ */ ++ if (ifm->m_flags & M_USEDLIST) { ++ remque(ifm); ++ ifm->m_flags &= ~M_USEDLIST; ++ } ++ ++ /* ++ * See if there's already a batchq list for this session. ++ * This can include an interactive session, which should go on fastq, ++ * but gets too greedy... hence it'll be downgraded from fastq to batchq. ++ * We mustn't put this packet back on the fastq (or we'll send it out of ++ * order) ++ * XXX add cache here? ++ */ ++ if (so) { ++ for (ifq = (struct mbuf *)slirp->if_batchq.qh_rlink; ++ (struct quehead *)ifq != &slirp->if_batchq; ifq = ifq->ifq_prev) { ++ if (so == ifq->ifq_so) { ++ /* A match! */ ++ ifm->ifq_so = so; ++ ifs_insque(ifm, ifq->ifs_prev); ++ goto diddit; ++ } ++ } ++ } ++ ++ /* No match, check which queue to put it on */ ++ if (so && (so->so_iptos & IPTOS_LOWDELAY)) { ++ ifq = (struct mbuf *)slirp->if_fastq.qh_rlink; ++ on_fastq = 1; ++ /* ++ * Check if this packet is a part of the last ++ * packet's session ++ */ ++ if (ifq->ifq_so == so) { ++ ifm->ifq_so = so; ++ ifs_insque(ifm, ifq->ifs_prev); ++ goto diddit; ++ } ++ } else { ++ ifq = (struct mbuf *)slirp->if_batchq.qh_rlink; ++ } ++ ++ /* Create a new doubly linked list for this session */ ++ ifm->ifq_so = so; ++ ifs_init(ifm); ++ insque(ifm, ifq); ++ ++diddit: ++ if (so) { ++ /* Update *_queued */ ++ so->so_queued++; ++ so->so_nqueued++; ++ /* ++ * Check if the interactive session should be downgraded to ++ * the batchq. A session is downgraded if it has queued 6 ++ * packets without pausing, and at least 3 of those packets ++ * have been sent over the link ++ * (XXX These are arbitrary numbers, probably not optimal..) ++ */ ++ if (on_fastq && ++ ((so->so_nqueued >= 6) && (so->so_nqueued - so->so_queued) >= 3)) { ++ /* Remove from current queue... */ ++ remque(ifm->ifs_next); ++ ++ /* ...And insert in the new. That'll teach ya! */ ++ insque(ifm->ifs_next, &slirp->if_batchq); ++ } ++ } ++ ++ /* ++ * This prevents us from malloc()ing too many mbufs ++ */ ++ if_start(ifm->slirp); ++} ++ ++/* ++ * Send one packet from each session. ++ * If there are packets on the fastq, they are sent FIFO, before ++ * everything else. Then we choose the first packet from each ++ * batchq session (socket) and send it. ++ * For example, if there are 3 ftp sessions fighting for bandwidth, ++ * one packet will be sent from the first session, then one packet ++ * from the second session, then one packet from the third. ++ */ ++void if_start(Slirp *slirp) ++{ ++ uint64_t now = slirp->cb->clock_get_ns(slirp->opaque); ++ bool from_batchq = false; ++ struct mbuf *ifm, *ifm_next, *ifqt; ++ ++ DEBUG_VERBOSE_CALL("if_start"); ++ ++ if (slirp->if_start_busy) { ++ return; ++ } ++ slirp->if_start_busy = true; ++ ++ struct mbuf *batch_head = NULL; ++ if (slirp->if_batchq.qh_link != &slirp->if_batchq) { ++ batch_head = (struct mbuf *)slirp->if_batchq.qh_link; ++ } ++ ++ if (slirp->if_fastq.qh_link != &slirp->if_fastq) { ++ ifm_next = (struct mbuf *)slirp->if_fastq.qh_link; ++ } else if (batch_head) { ++ /* Nothing on fastq, pick up from batchq */ ++ ifm_next = batch_head; ++ from_batchq = true; ++ } else { ++ ifm_next = NULL; ++ } ++ ++ while (ifm_next) { ++ ifm = ifm_next; ++ ++ ifm_next = ifm->ifq_next; ++ if ((struct quehead *)ifm_next == &slirp->if_fastq) { ++ /* No more packets in fastq, switch to batchq */ ++ ifm_next = batch_head; ++ from_batchq = true; ++ } ++ if ((struct quehead *)ifm_next == &slirp->if_batchq) { ++ /* end of batchq */ ++ ifm_next = NULL; ++ } ++ ++ /* Try to send packet unless it already expired */ ++ if (ifm->expiration_date >= now && !if_encap(slirp, ifm)) { ++ /* Packet is delayed due to pending ARP or NDP resolution */ ++ continue; ++ } ++ ++ /* Remove it from the queue */ ++ ifqt = ifm->ifq_prev; ++ remque(ifm); ++ ++ /* If there are more packets for this session, re-queue them */ ++ if (ifm->ifs_next != ifm) { ++ struct mbuf *next = ifm->ifs_next; ++ ++ insque(next, ifqt); ++ ifs_remque(ifm); ++ if (!from_batchq) { ++ ifm_next = next; ++ } ++ } ++ ++ /* Update so_queued */ ++ if (ifm->ifq_so && --ifm->ifq_so->so_queued == 0) { ++ /* If there's no more queued, reset nqueued */ ++ ifm->ifq_so->so_nqueued = 0; ++ } ++ ++ m_free(ifm); ++ } ++ ++ slirp->if_start_busy = false; ++} +diff --git a/slirp/src/if.h b/slirp/src/if.h +new file mode 100644 +index 0000000000..7cf9d2750e +--- /dev/null ++++ b/slirp/src/if.h +@@ -0,0 +1,25 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef IF_H ++#define IF_H ++ ++#define IF_COMPRESS 0x01 /* We want compression */ ++#define IF_NOCOMPRESS 0x02 /* Do not do compression */ ++#define IF_AUTOCOMP 0x04 /* Autodetect (default) */ ++#define IF_NOCIDCOMP 0x08 /* CID compression */ ++ ++#define IF_MTU_DEFAULT 1500 ++#define IF_MTU_MIN 68 ++#define IF_MTU_MAX 65521 ++#define IF_MRU_DEFAULT 1500 ++#define IF_MRU_MIN 68 ++#define IF_MRU_MAX 65521 ++#define IF_COMP IF_AUTOCOMP /* Flags for compression */ ++ ++/* 2 for alignment, 14 for ethernet */ ++#define IF_MAXLINKHDR (2 + ETH_HLEN) ++ ++#endif +diff --git a/slirp/src/ip.h b/slirp/src/ip.h +new file mode 100644 +index 0000000000..e5d4aa8a6d +--- /dev/null ++++ b/slirp/src/ip.h +@@ -0,0 +1,242 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)ip.h 8.1 (Berkeley) 6/10/93 ++ * ip.h,v 1.3 1994/08/21 05:27:30 paul Exp ++ */ ++ ++#ifndef IP_H ++#define IP_H ++ ++#include ++ ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++#undef NTOHL ++#undef NTOHS ++#undef HTONL ++#undef HTONS ++#define NTOHL(d) ++#define NTOHS(d) ++#define HTONL(d) ++#define HTONS(d) ++#else ++#ifndef NTOHL ++#define NTOHL(d) ((d) = ntohl((d))) ++#endif ++#ifndef NTOHS ++#define NTOHS(d) ((d) = ntohs((uint16_t)(d))) ++#endif ++#ifndef HTONL ++#define HTONL(d) ((d) = htonl((d))) ++#endif ++#ifndef HTONS ++#define HTONS(d) ((d) = htons((uint16_t)(d))) ++#endif ++#endif ++ ++typedef uint32_t n_long; /* long as received from the net */ ++ ++/* ++ * Definitions for internet protocol version 4. ++ * Per RFC 791, September 1981. ++ */ ++#define IPVERSION 4 ++ ++/* ++ * Structure of an internet header, naked of options. ++ */ ++struct ip { ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint8_t ip_v : 4, /* version */ ++ ip_hl : 4; /* header length */ ++#else ++ uint8_t ip_hl : 4, /* header length */ ++ ip_v : 4; /* version */ ++#endif ++ uint8_t ip_tos; /* type of service */ ++ uint16_t ip_len; /* total length */ ++ uint16_t ip_id; /* identification */ ++ uint16_t ip_off; /* fragment offset field */ ++#define IP_DF 0x4000 /* don't fragment flag */ ++#define IP_MF 0x2000 /* more fragments flag */ ++#define IP_OFFMASK 0x1fff /* mask for fragmenting bits */ ++ uint8_t ip_ttl; /* time to live */ ++ uint8_t ip_p; /* protocol */ ++ uint16_t ip_sum; /* checksum */ ++ struct in_addr ip_src, ip_dst; /* source and dest address */ ++} SLIRP_PACKED; ++ ++#define IP_MAXPACKET 65535 /* maximum packet size */ ++ ++/* ++ * Definitions for IP type of service (ip_tos) ++ */ ++#define IPTOS_LOWDELAY 0x10 ++#define IPTOS_THROUGHPUT 0x08 ++#define IPTOS_RELIABILITY 0x04 ++ ++/* ++ * Definitions for options. ++ */ ++#define IPOPT_COPIED(o) ((o)&0x80) ++#define IPOPT_CLASS(o) ((o)&0x60) ++#define IPOPT_NUMBER(o) ((o)&0x1f) ++ ++#define IPOPT_CONTROL 0x00 ++#define IPOPT_RESERVED1 0x20 ++#define IPOPT_DEBMEAS 0x40 ++#define IPOPT_RESERVED2 0x60 ++ ++#define IPOPT_EOL 0 /* end of option list */ ++#define IPOPT_NOP 1 /* no operation */ ++ ++#define IPOPT_RR 7 /* record packet route */ ++#define IPOPT_TS 68 /* timestamp */ ++#define IPOPT_SECURITY 130 /* provide s,c,h,tcc */ ++#define IPOPT_LSRR 131 /* loose source route */ ++#define IPOPT_SATID 136 /* satnet id */ ++#define IPOPT_SSRR 137 /* strict source route */ ++ ++/* ++ * Offsets to fields in options other than EOL and NOP. ++ */ ++#define IPOPT_OPTVAL 0 /* option ID */ ++#define IPOPT_OLEN 1 /* option length */ ++#define IPOPT_OFFSET 2 /* offset within option */ ++#define IPOPT_MINOFF 4 /* min value of above */ ++ ++/* ++ * Time stamp option structure. ++ */ ++struct ip_timestamp { ++ uint8_t ipt_code; /* IPOPT_TS */ ++ uint8_t ipt_len; /* size of structure (variable) */ ++ uint8_t ipt_ptr; /* index of current entry */ ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint8_t ipt_oflw : 4, /* overflow counter */ ++ ipt_flg : 4; /* flags, see below */ ++#else ++ uint8_t ipt_flg : 4, /* flags, see below */ ++ ipt_oflw : 4; /* overflow counter */ ++#endif ++ union ipt_timestamp { ++ n_long ipt_time[1]; ++ struct ipt_ta { ++ struct in_addr ipt_addr; ++ n_long ipt_time; ++ } ipt_ta[1]; ++ } ipt_timestamp; ++} SLIRP_PACKED; ++ ++/* flag bits for ipt_flg */ ++#define IPOPT_TS_TSONLY 0 /* timestamps only */ ++#define IPOPT_TS_TSANDADDR 1 /* timestamps and addresses */ ++#define IPOPT_TS_PRESPEC 3 /* specified modules only */ ++ ++/* bits for security (not byte swapped) */ ++#define IPOPT_SECUR_UNCLASS 0x0000 ++#define IPOPT_SECUR_CONFID 0xf135 ++#define IPOPT_SECUR_EFTO 0x789a ++#define IPOPT_SECUR_MMMM 0xbc4d ++#define IPOPT_SECUR_RESTR 0xaf13 ++#define IPOPT_SECUR_SECRET 0xd788 ++#define IPOPT_SECUR_TOPSECRET 0x6bc5 ++ ++/* ++ * Internet implementation parameters. ++ */ ++#define MAXTTL 255 /* maximum time to live (seconds) */ ++#define IPDEFTTL 64 /* default ttl, from RFC 1340 */ ++#define IPFRAGTTL 60 /* time to live for frags, slowhz */ ++#define IPTTLDEC 1 /* subtracted when forwarding */ ++ ++#define IP_MSS 576 /* default maximum segment size */ ++ ++#if GLIB_SIZEOF_VOID_P == 4 ++struct mbuf_ptr { ++ struct mbuf *mptr; ++ uint32_t dummy; ++} SLIRP_PACKED; ++#else ++struct mbuf_ptr { ++ struct mbuf *mptr; ++} SLIRP_PACKED; ++#endif ++struct qlink { ++ void *next, *prev; ++}; ++ ++/* ++ * Overlay for ip header used by other protocols (tcp, udp). ++ */ ++struct ipovly { ++ struct mbuf_ptr ih_mbuf; /* backpointer to mbuf */ ++ uint8_t ih_x1; /* (unused) */ ++ uint8_t ih_pr; /* protocol */ ++ uint16_t ih_len; /* protocol length */ ++ struct in_addr ih_src; /* source internet address */ ++ struct in_addr ih_dst; /* destination internet address */ ++} SLIRP_PACKED; ++ ++/* ++ * Ip reassembly queue structure. Each fragment ++ * being reassembled is attached to one of these structures. ++ * They are timed out after ipq_ttl drops to 0, and may also ++ * be reclaimed if memory becomes tight. ++ * size 28 bytes ++ */ ++struct ipq { ++ struct qlink frag_link; /* to ip headers of fragments */ ++ struct qlink ip_link; /* to other reass headers */ ++ uint8_t ipq_ttl; /* time for reass q to live */ ++ uint8_t ipq_p; /* protocol of this fragment */ ++ uint16_t ipq_id; /* sequence id for reassembly */ ++ struct in_addr ipq_src, ipq_dst; ++}; ++ ++/* ++ * Ip header, when holding a fragment. ++ * ++ * Note: ipf_link must be at same offset as frag_link above ++ */ ++struct ipasfrag { ++ struct qlink ipf_link; ++ struct ip ipf_ip; ++}; ++ ++G_STATIC_ASSERT(offsetof(struct ipq, frag_link) == ++ offsetof(struct ipasfrag, ipf_link)); ++ ++#define ipf_off ipf_ip.ip_off ++#define ipf_tos ipf_ip.ip_tos ++#define ipf_len ipf_ip.ip_len ++#define ipf_next ipf_link.next ++#define ipf_prev ipf_link.prev ++ ++#endif +diff --git a/slirp/src/ip6.h b/slirp/src/ip6.h +new file mode 100644 +index 0000000000..0630309d29 +--- /dev/null ++++ b/slirp/src/ip6.h +@@ -0,0 +1,214 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#ifndef SLIRP_IP6_H ++#define SLIRP_IP6_H ++ ++#include ++#include ++ ++#define ALLNODES_MULTICAST \ ++ { \ ++ .s6_addr = { \ ++ 0xff, \ ++ 0x02, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x01 \ ++ } \ ++ } ++ ++#define SOLICITED_NODE_PREFIX \ ++ { \ ++ .s6_addr = { \ ++ 0xff, \ ++ 0x02, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x01, \ ++ 0xff, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00 \ ++ } \ ++ } ++ ++#define LINKLOCAL_ADDR \ ++ { \ ++ .s6_addr = { \ ++ 0xfe, \ ++ 0x80, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x02 \ ++ } \ ++ } ++ ++#define ZERO_ADDR \ ++ { \ ++ .s6_addr = { \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00 \ ++ } \ ++ } ++ ++static inline bool in6_equal(const struct in6_addr *a, const struct in6_addr *b) ++{ ++ return memcmp(a, b, sizeof(*a)) == 0; ++} ++ ++static inline bool in6_equal_net(const struct in6_addr *a, ++ const struct in6_addr *b, int prefix_len) ++{ ++ if (memcmp(a, b, prefix_len / 8) != 0) { ++ return 0; ++ } ++ ++ if (prefix_len % 8 == 0) { ++ return 1; ++ } ++ ++ return a->s6_addr[prefix_len / 8] >> (8 - (prefix_len % 8)) == ++ b->s6_addr[prefix_len / 8] >> (8 - (prefix_len % 8)); ++} ++ ++static inline bool in6_equal_mach(const struct in6_addr *a, ++ const struct in6_addr *b, int prefix_len) ++{ ++ if (memcmp(&(a->s6_addr[DIV_ROUND_UP(prefix_len, 8)]), ++ &(b->s6_addr[DIV_ROUND_UP(prefix_len, 8)]), ++ 16 - DIV_ROUND_UP(prefix_len, 8)) != 0) { ++ return 0; ++ } ++ ++ if (prefix_len % 8 == 0) { ++ return 1; ++ } ++ ++ return (a->s6_addr[prefix_len / 8] & ++ ((1U << (8 - (prefix_len % 8))) - 1)) == ++ (b->s6_addr[prefix_len / 8] & ((1U << (8 - (prefix_len % 8))) - 1)); ++} ++ ++ ++#define in6_equal_router(a) \ ++ ((in6_equal_net(a, &slirp->vprefix_addr6, slirp->vprefix_len) && \ ++ in6_equal_mach(a, &slirp->vhost_addr6, slirp->vprefix_len)) || \ ++ (in6_equal_net(a, &(struct in6_addr)LINKLOCAL_ADDR, 64) && \ ++ in6_equal_mach(a, &slirp->vhost_addr6, 64))) ++ ++#define in6_equal_dns(a) \ ++ ((in6_equal_net(a, &slirp->vprefix_addr6, slirp->vprefix_len) && \ ++ in6_equal_mach(a, &slirp->vnameserver_addr6, slirp->vprefix_len)) || \ ++ (in6_equal_net(a, &(struct in6_addr)LINKLOCAL_ADDR, 64) && \ ++ in6_equal_mach(a, &slirp->vnameserver_addr6, 64))) ++ ++#define in6_equal_host(a) (in6_equal_router(a) || in6_equal_dns(a)) ++ ++#define in6_solicitednode_multicast(a) \ ++ (in6_equal_net(a, &(struct in6_addr)SOLICITED_NODE_PREFIX, 104)) ++ ++#define in6_zero(a) (in6_equal(a, &(struct in6_addr)ZERO_ADDR)) ++ ++/* Compute emulated host MAC address from its ipv6 address */ ++static inline void in6_compute_ethaddr(struct in6_addr ip, ++ uint8_t eth[ETH_ALEN]) ++{ ++ eth[0] = 0x52; ++ eth[1] = 0x56; ++ memcpy(ð[2], &ip.s6_addr[16 - (ETH_ALEN - 2)], ETH_ALEN - 2); ++} ++ ++/* ++ * Definitions for internet protocol version 6. ++ * Per RFC 2460, December 1998. ++ */ ++#define IP6VERSION 6 ++#define IP6_HOP_LIMIT 255 ++ ++/* ++ * Structure of an internet header, naked of options. ++ */ ++struct ip6 { ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint32_t ip_v : 4, /* version */ ++ ip_tc_hi : 4, /* traffic class */ ++ ip_tc_lo : 4, ip_fl_hi : 4, /* flow label */ ++ ip_fl_lo : 16; ++#else ++ uint32_t ip_tc_hi : 4, ip_v : 4, ip_fl_hi : 4, ip_tc_lo : 4, ip_fl_lo : 16; ++#endif ++ uint16_t ip_pl; /* payload length */ ++ uint8_t ip_nh; /* next header */ ++ uint8_t ip_hl; /* hop limit */ ++ struct in6_addr ip_src, ip_dst; /* source and dest address */ ++}; ++ ++/* ++ * IPv6 pseudo-header used by upper-layer protocols ++ */ ++struct ip6_pseudohdr { ++ struct in6_addr ih_src; /* source internet address */ ++ struct in6_addr ih_dst; /* destination internet address */ ++ uint32_t ih_pl; /* upper-layer packet length */ ++ uint16_t ih_zero_hi; /* zero */ ++ uint8_t ih_zero_lo; /* zero */ ++ uint8_t ih_nh; /* next header */ ++}; ++ ++/* ++ * We don't want to mark these ip6 structs as packed as they are naturally ++ * correctly aligned; instead assert that there is no stray padding. ++ * If we marked the struct as packed then we would be unable to take ++ * the address of any of the fields in it. ++ */ ++G_STATIC_ASSERT(sizeof(struct ip6) == 40); ++G_STATIC_ASSERT(sizeof(struct ip6_pseudohdr) == 40); ++ ++#endif +diff --git a/slirp/src/ip6_icmp.c b/slirp/src/ip6_icmp.c +new file mode 100644 +index 0000000000..738b40f725 +--- /dev/null ++++ b/slirp/src/ip6_icmp.c +@@ -0,0 +1,444 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#include "slirp.h" ++#include "ip6_icmp.h" ++ ++#define NDP_Interval \ ++ g_rand_int_range(slirp->grand, NDP_MinRtrAdvInterval, NDP_MaxRtrAdvInterval) ++ ++static void ra_timer_handler(void *opaque) ++{ ++ Slirp *slirp = opaque; ++ ++ slirp->cb->timer_mod(slirp->ra_timer, ++ slirp->cb->clock_get_ns(slirp->opaque) / SCALE_MS + ++ NDP_Interval, ++ slirp->opaque); ++ ndp_send_ra(slirp); ++} ++ ++void icmp6_init(Slirp *slirp) ++{ ++ if (!slirp->in6_enabled) { ++ return; ++ } ++ ++ slirp->ra_timer = ++ slirp->cb->timer_new(ra_timer_handler, slirp, slirp->opaque); ++ slirp->cb->timer_mod(slirp->ra_timer, ++ slirp->cb->clock_get_ns(slirp->opaque) / SCALE_MS + ++ NDP_Interval, ++ slirp->opaque); ++} ++ ++void icmp6_cleanup(Slirp *slirp) ++{ ++ if (!slirp->in6_enabled) { ++ return; ++ } ++ ++ slirp->cb->timer_free(slirp->ra_timer, slirp->opaque); ++} ++ ++static void icmp6_send_echoreply(struct mbuf *m, Slirp *slirp, struct ip6 *ip, ++ struct icmp6 *icmp) ++{ ++ struct mbuf *t = m_get(slirp); ++ t->m_len = sizeof(struct ip6) + ntohs(ip->ip_pl); ++ memcpy(t->m_data, m->m_data, t->m_len); ++ ++ /* IPv6 Packet */ ++ struct ip6 *rip = mtod(t, struct ip6 *); ++ rip->ip_dst = ip->ip_src; ++ rip->ip_src = ip->ip_dst; ++ ++ /* ICMPv6 packet */ ++ t->m_data += sizeof(struct ip6); ++ struct icmp6 *ricmp = mtod(t, struct icmp6 *); ++ ricmp->icmp6_type = ICMP6_ECHO_REPLY; ++ ricmp->icmp6_cksum = 0; ++ ++ /* Checksum */ ++ t->m_data -= sizeof(struct ip6); ++ ricmp->icmp6_cksum = ip6_cksum(t); ++ ++ ip6_output(NULL, t, 0); ++} ++ ++void icmp6_forward_error(struct mbuf *m, uint8_t type, uint8_t code, struct in6_addr *src) ++{ ++ Slirp *slirp = m->slirp; ++ struct mbuf *t; ++ struct ip6 *ip = mtod(m, struct ip6 *); ++ char addrstr[INET6_ADDRSTRLEN]; ++ ++ DEBUG_CALL("icmp6_send_error"); ++ DEBUG_ARG("type = %d, code = %d", type, code); ++ ++ if (IN6_IS_ADDR_MULTICAST(&ip->ip_src) || in6_zero(&ip->ip_src)) { ++ /* TODO icmp error? */ ++ return; ++ } ++ ++ t = m_get(slirp); ++ ++ /* IPv6 packet */ ++ struct ip6 *rip = mtod(t, struct ip6 *); ++ rip->ip_src = *src; ++ rip->ip_dst = ip->ip_src; ++ inet_ntop(AF_INET6, &rip->ip_dst, addrstr, INET6_ADDRSTRLEN); ++ DEBUG_ARG("target = %s", addrstr); ++ ++ rip->ip_nh = IPPROTO_ICMPV6; ++ const int error_data_len = MIN( ++ m->m_len, slirp->if_mtu - (sizeof(struct ip6) + ICMP6_ERROR_MINLEN)); ++ rip->ip_pl = htons(ICMP6_ERROR_MINLEN + error_data_len); ++ t->m_len = sizeof(struct ip6) + ntohs(rip->ip_pl); ++ ++ /* ICMPv6 packet */ ++ t->m_data += sizeof(struct ip6); ++ struct icmp6 *ricmp = mtod(t, struct icmp6 *); ++ ricmp->icmp6_type = type; ++ ricmp->icmp6_code = code; ++ ricmp->icmp6_cksum = 0; ++ ++ switch (type) { ++ case ICMP6_UNREACH: ++ case ICMP6_TIMXCEED: ++ ricmp->icmp6_err.unused = 0; ++ break; ++ case ICMP6_TOOBIG: ++ ricmp->icmp6_err.mtu = htonl(slirp->if_mtu); ++ break; ++ case ICMP6_PARAMPROB: ++ /* TODO: Handle this case */ ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ t->m_data += ICMP6_ERROR_MINLEN; ++ memcpy(t->m_data, m->m_data, error_data_len); ++ ++ /* Checksum */ ++ t->m_data -= ICMP6_ERROR_MINLEN; ++ t->m_data -= sizeof(struct ip6); ++ ricmp->icmp6_cksum = ip6_cksum(t); ++ ++ ip6_output(NULL, t, 0); ++} ++ ++void icmp6_send_error(struct mbuf *m, uint8_t type, uint8_t code) ++{ ++ struct in6_addr src = LINKLOCAL_ADDR; ++ icmp6_forward_error(m, type, code, &src); ++} ++ ++/* ++ * Send NDP Router Advertisement ++ */ ++void ndp_send_ra(Slirp *slirp) ++{ ++ DEBUG_CALL("ndp_send_ra"); ++ ++ /* Build IPv6 packet */ ++ struct mbuf *t = m_get(slirp); ++ struct ip6 *rip = mtod(t, struct ip6 *); ++ size_t pl_size = 0; ++ struct in6_addr addr; ++ uint32_t scope_id; ++ ++ rip->ip_src = (struct in6_addr)LINKLOCAL_ADDR; ++ rip->ip_dst = (struct in6_addr)ALLNODES_MULTICAST; ++ rip->ip_nh = IPPROTO_ICMPV6; ++ ++ /* Build ICMPv6 packet */ ++ t->m_data += sizeof(struct ip6); ++ struct icmp6 *ricmp = mtod(t, struct icmp6 *); ++ ricmp->icmp6_type = ICMP6_NDP_RA; ++ ricmp->icmp6_code = 0; ++ ricmp->icmp6_cksum = 0; ++ ++ /* NDP */ ++ ricmp->icmp6_nra.chl = NDP_AdvCurHopLimit; ++ ricmp->icmp6_nra.M = NDP_AdvManagedFlag; ++ ricmp->icmp6_nra.O = NDP_AdvOtherConfigFlag; ++ ricmp->icmp6_nra.reserved = 0; ++ ricmp->icmp6_nra.lifetime = htons(NDP_AdvDefaultLifetime); ++ ricmp->icmp6_nra.reach_time = htonl(NDP_AdvReachableTime); ++ ricmp->icmp6_nra.retrans_time = htonl(NDP_AdvRetransTime); ++ t->m_data += ICMP6_NDP_RA_MINLEN; ++ pl_size += ICMP6_NDP_RA_MINLEN; ++ ++ /* Source link-layer address (NDP option) */ ++ struct ndpopt *opt = mtod(t, struct ndpopt *); ++ opt->ndpopt_type = NDPOPT_LINKLAYER_SOURCE; ++ opt->ndpopt_len = NDPOPT_LINKLAYER_LEN / 8; ++ in6_compute_ethaddr(rip->ip_src, opt->ndpopt_linklayer); ++ t->m_data += NDPOPT_LINKLAYER_LEN; ++ pl_size += NDPOPT_LINKLAYER_LEN; ++ ++ /* Prefix information (NDP option) */ ++ struct ndpopt *opt2 = mtod(t, struct ndpopt *); ++ opt2->ndpopt_type = NDPOPT_PREFIX_INFO; ++ opt2->ndpopt_len = NDPOPT_PREFIXINFO_LEN / 8; ++ opt2->ndpopt_prefixinfo.prefix_length = slirp->vprefix_len; ++ opt2->ndpopt_prefixinfo.L = 1; ++ opt2->ndpopt_prefixinfo.A = 1; ++ opt2->ndpopt_prefixinfo.reserved1 = 0; ++ opt2->ndpopt_prefixinfo.valid_lt = htonl(NDP_AdvValidLifetime); ++ opt2->ndpopt_prefixinfo.pref_lt = htonl(NDP_AdvPrefLifetime); ++ opt2->ndpopt_prefixinfo.reserved2 = 0; ++ opt2->ndpopt_prefixinfo.prefix = slirp->vprefix_addr6; ++ t->m_data += NDPOPT_PREFIXINFO_LEN; ++ pl_size += NDPOPT_PREFIXINFO_LEN; ++ ++ /* Prefix information (NDP option) */ ++ if (get_dns6_addr(&addr, &scope_id) >= 0) { ++ /* Host system does have an IPv6 DNS server, announce our proxy. */ ++ struct ndpopt *opt3 = mtod(t, struct ndpopt *); ++ opt3->ndpopt_type = NDPOPT_RDNSS; ++ opt3->ndpopt_len = NDPOPT_RDNSS_LEN / 8; ++ opt3->ndpopt_rdnss.reserved = 0; ++ opt3->ndpopt_rdnss.lifetime = htonl(2 * NDP_MaxRtrAdvInterval); ++ opt3->ndpopt_rdnss.addr = slirp->vnameserver_addr6; ++ t->m_data += NDPOPT_RDNSS_LEN; ++ pl_size += NDPOPT_RDNSS_LEN; ++ } ++ ++ rip->ip_pl = htons(pl_size); ++ t->m_data -= sizeof(struct ip6) + pl_size; ++ t->m_len = sizeof(struct ip6) + pl_size; ++ ++ /* ICMPv6 Checksum */ ++ ricmp->icmp6_cksum = ip6_cksum(t); ++ ++ ip6_output(NULL, t, 0); ++} ++ ++/* ++ * Send NDP Neighbor Solitication ++ */ ++void ndp_send_ns(Slirp *slirp, struct in6_addr addr) ++{ ++ char addrstr[INET6_ADDRSTRLEN]; ++ ++ inet_ntop(AF_INET6, &addr, addrstr, INET6_ADDRSTRLEN); ++ ++ DEBUG_CALL("ndp_send_ns"); ++ DEBUG_ARG("target = %s", addrstr); ++ ++ /* Build IPv6 packet */ ++ struct mbuf *t = m_get(slirp); ++ struct ip6 *rip = mtod(t, struct ip6 *); ++ rip->ip_src = slirp->vhost_addr6; ++ rip->ip_dst = (struct in6_addr)SOLICITED_NODE_PREFIX; ++ memcpy(&rip->ip_dst.s6_addr[13], &addr.s6_addr[13], 3); ++ rip->ip_nh = IPPROTO_ICMPV6; ++ rip->ip_pl = htons(ICMP6_NDP_NS_MINLEN + NDPOPT_LINKLAYER_LEN); ++ t->m_len = sizeof(struct ip6) + ntohs(rip->ip_pl); ++ ++ /* Build ICMPv6 packet */ ++ t->m_data += sizeof(struct ip6); ++ struct icmp6 *ricmp = mtod(t, struct icmp6 *); ++ ricmp->icmp6_type = ICMP6_NDP_NS; ++ ricmp->icmp6_code = 0; ++ ricmp->icmp6_cksum = 0; ++ ++ /* NDP */ ++ ricmp->icmp6_nns.reserved = 0; ++ ricmp->icmp6_nns.target = addr; ++ ++ /* Build NDP option */ ++ t->m_data += ICMP6_NDP_NS_MINLEN; ++ struct ndpopt *opt = mtod(t, struct ndpopt *); ++ opt->ndpopt_type = NDPOPT_LINKLAYER_SOURCE; ++ opt->ndpopt_len = NDPOPT_LINKLAYER_LEN / 8; ++ in6_compute_ethaddr(slirp->vhost_addr6, opt->ndpopt_linklayer); ++ ++ /* ICMPv6 Checksum */ ++ t->m_data -= ICMP6_NDP_NA_MINLEN; ++ t->m_data -= sizeof(struct ip6); ++ ricmp->icmp6_cksum = ip6_cksum(t); ++ ++ ip6_output(NULL, t, 1); ++} ++ ++/* ++ * Send NDP Neighbor Advertisement ++ */ ++static void ndp_send_na(Slirp *slirp, struct ip6 *ip, struct icmp6 *icmp) ++{ ++ /* Build IPv6 packet */ ++ struct mbuf *t = m_get(slirp); ++ struct ip6 *rip = mtod(t, struct ip6 *); ++ rip->ip_src = icmp->icmp6_nns.target; ++ if (in6_zero(&ip->ip_src)) { ++ rip->ip_dst = (struct in6_addr)ALLNODES_MULTICAST; ++ } else { ++ rip->ip_dst = ip->ip_src; ++ } ++ rip->ip_nh = IPPROTO_ICMPV6; ++ rip->ip_pl = htons(ICMP6_NDP_NA_MINLEN + NDPOPT_LINKLAYER_LEN); ++ t->m_len = sizeof(struct ip6) + ntohs(rip->ip_pl); ++ ++ /* Build ICMPv6 packet */ ++ t->m_data += sizeof(struct ip6); ++ struct icmp6 *ricmp = mtod(t, struct icmp6 *); ++ ricmp->icmp6_type = ICMP6_NDP_NA; ++ ricmp->icmp6_code = 0; ++ ricmp->icmp6_cksum = 0; ++ ++ /* NDP */ ++ ricmp->icmp6_nna.R = NDP_IsRouter; ++ ricmp->icmp6_nna.S = !IN6_IS_ADDR_MULTICAST(&rip->ip_dst); ++ ricmp->icmp6_nna.O = 1; ++ ricmp->icmp6_nna.reserved_hi = 0; ++ ricmp->icmp6_nna.reserved_lo = 0; ++ ricmp->icmp6_nna.target = icmp->icmp6_nns.target; ++ ++ /* Build NDP option */ ++ t->m_data += ICMP6_NDP_NA_MINLEN; ++ struct ndpopt *opt = mtod(t, struct ndpopt *); ++ opt->ndpopt_type = NDPOPT_LINKLAYER_TARGET; ++ opt->ndpopt_len = NDPOPT_LINKLAYER_LEN / 8; ++ in6_compute_ethaddr(ricmp->icmp6_nna.target, opt->ndpopt_linklayer); ++ ++ /* ICMPv6 Checksum */ ++ t->m_data -= ICMP6_NDP_NA_MINLEN; ++ t->m_data -= sizeof(struct ip6); ++ ricmp->icmp6_cksum = ip6_cksum(t); ++ ++ ip6_output(NULL, t, 0); ++} ++ ++/* ++ * Process a NDP message ++ */ ++static void ndp_input(struct mbuf *m, Slirp *slirp, struct ip6 *ip, ++ struct icmp6 *icmp) ++{ ++ g_assert(M_ROOMBEFORE(m) >= ETH_HLEN); ++ ++ m->m_len += ETH_HLEN; ++ m->m_data -= ETH_HLEN; ++ struct ethhdr *eth = mtod(m, struct ethhdr *); ++ m->m_len -= ETH_HLEN; ++ m->m_data += ETH_HLEN; ++ ++ switch (icmp->icmp6_type) { ++ case ICMP6_NDP_RS: ++ DEBUG_CALL(" type = Router Solicitation"); ++ if (ip->ip_hl == 255 && icmp->icmp6_code == 0 && ++ ntohs(ip->ip_pl) >= ICMP6_NDP_RS_MINLEN) { ++ /* Gratuitous NDP */ ++ ndp_table_add(slirp, ip->ip_src, eth->h_source); ++ ++ ndp_send_ra(slirp); ++ } ++ break; ++ ++ case ICMP6_NDP_RA: ++ DEBUG_CALL(" type = Router Advertisement"); ++ slirp->cb->guest_error("Warning: guest sent NDP RA, but shouldn't", ++ slirp->opaque); ++ break; ++ ++ case ICMP6_NDP_NS: ++ DEBUG_CALL(" type = Neighbor Solicitation"); ++ if (ip->ip_hl == 255 && icmp->icmp6_code == 0 && ++ !IN6_IS_ADDR_MULTICAST(&icmp->icmp6_nns.target) && ++ ntohs(ip->ip_pl) >= ICMP6_NDP_NS_MINLEN && ++ (!in6_zero(&ip->ip_src) || ++ in6_solicitednode_multicast(&ip->ip_dst))) { ++ if (in6_equal_host(&icmp->icmp6_nns.target)) { ++ /* Gratuitous NDP */ ++ ndp_table_add(slirp, ip->ip_src, eth->h_source); ++ ndp_send_na(slirp, ip, icmp); ++ } ++ } ++ break; ++ ++ case ICMP6_NDP_NA: ++ DEBUG_CALL(" type = Neighbor Advertisement"); ++ if (ip->ip_hl == 255 && icmp->icmp6_code == 0 && ++ ntohs(ip->ip_pl) >= ICMP6_NDP_NA_MINLEN && ++ !IN6_IS_ADDR_MULTICAST(&icmp->icmp6_nna.target) && ++ (!IN6_IS_ADDR_MULTICAST(&ip->ip_dst) || icmp->icmp6_nna.S == 0)) { ++ ndp_table_add(slirp, ip->ip_src, eth->h_source); ++ } ++ break; ++ ++ case ICMP6_NDP_REDIRECT: ++ DEBUG_CALL(" type = Redirect"); ++ slirp->cb->guest_error( ++ "Warning: guest sent NDP REDIRECT, but shouldn't", slirp->opaque); ++ break; ++ } ++} ++ ++/* ++ * Process a received ICMPv6 message. ++ */ ++void icmp6_input(struct mbuf *m) ++{ ++ Slirp *slirp = m->slirp; ++ /* NDP reads the ethernet header for gratuitous NDP */ ++ M_DUP_DEBUG(slirp, m, 1, ETH_HLEN); ++ ++ struct icmp6 *icmp; ++ struct ip6 *ip = mtod(m, struct ip6 *); ++ int hlen = sizeof(struct ip6); ++ ++ DEBUG_CALL("icmp6_input"); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("m_len = %d", m->m_len); ++ ++ if (ntohs(ip->ip_pl) < ICMP6_MINLEN) { ++ goto end; ++ } ++ ++ if (ip6_cksum(m)) { ++ goto end; ++ } ++ ++ m->m_len -= hlen; ++ m->m_data += hlen; ++ icmp = mtod(m, struct icmp6 *); ++ m->m_len += hlen; ++ m->m_data -= hlen; ++ ++ DEBUG_ARG("icmp6_type = %d", icmp->icmp6_type); ++ switch (icmp->icmp6_type) { ++ case ICMP6_ECHO_REQUEST: ++ if (in6_equal_host(&ip->ip_dst)) { ++ icmp6_send_echoreply(m, slirp, ip, icmp); ++ } else { ++ /* TODO */ ++ g_critical("external icmpv6 not supported yet"); ++ } ++ break; ++ ++ case ICMP6_NDP_RS: ++ case ICMP6_NDP_RA: ++ case ICMP6_NDP_NS: ++ case ICMP6_NDP_NA: ++ case ICMP6_NDP_REDIRECT: ++ ndp_input(m, slirp, ip, icmp); ++ break; ++ ++ case ICMP6_UNREACH: ++ case ICMP6_TOOBIG: ++ case ICMP6_TIMXCEED: ++ case ICMP6_PARAMPROB: ++ /* XXX? report error? close socket? */ ++ default: ++ break; ++ } ++ ++end: ++ m_free(m); ++} +diff --git a/slirp/src/ip6_icmp.h b/slirp/src/ip6_icmp.h +new file mode 100644 +index 0000000000..9070999cfc +--- /dev/null ++++ b/slirp/src/ip6_icmp.h +@@ -0,0 +1,220 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#ifndef SLIRP_IP6_ICMP_H ++#define SLIRP_IP6_ICMP_H ++ ++/* ++ * Interface Control Message Protocol version 6 Definitions. ++ * Per RFC 4443, March 2006. ++ * ++ * Network Discover Protocol Definitions. ++ * Per RFC 4861, September 2007. ++ */ ++ ++struct icmp6_echo { /* Echo Messages */ ++ uint16_t id; ++ uint16_t seq_num; ++}; ++ ++union icmp6_error_body { ++ uint32_t unused; ++ uint32_t pointer; ++ uint32_t mtu; ++}; ++ ++/* ++ * NDP Messages ++ */ ++struct ndp_rs { /* Router Solicitation Message */ ++ uint32_t reserved; ++}; ++ ++struct ndp_ra { /* Router Advertisement Message */ ++ uint8_t chl; /* Cur Hop Limit */ ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint8_t M : 1, O : 1, reserved : 6; ++#else ++ uint8_t reserved : 6, O : 1, M : 1; ++#endif ++ uint16_t lifetime; /* Router Lifetime */ ++ uint32_t reach_time; /* Reachable Time */ ++ uint32_t retrans_time; /* Retrans Timer */ ++}; ++ ++G_STATIC_ASSERT(sizeof(struct ndp_ra) == 12); ++ ++struct ndp_ns { /* Neighbor Solicitation Message */ ++ uint32_t reserved; ++ struct in6_addr target; /* Target Address */ ++}; ++ ++G_STATIC_ASSERT(sizeof(struct ndp_ns) == 20); ++ ++struct ndp_na { /* Neighbor Advertisement Message */ ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint32_t R : 1, /* Router Flag */ ++ S : 1, /* Solicited Flag */ ++ O : 1, /* Override Flag */ ++ reserved_hi : 5, reserved_lo : 24; ++#else ++ uint32_t reserved_hi : 5, O : 1, S : 1, R : 1, reserved_lo : 24; ++#endif ++ struct in6_addr target; /* Target Address */ ++}; ++ ++G_STATIC_ASSERT(sizeof(struct ndp_na) == 20); ++ ++struct ndp_redirect { ++ uint32_t reserved; ++ struct in6_addr target; /* Target Address */ ++ struct in6_addr dest; /* Destination Address */ ++}; ++ ++G_STATIC_ASSERT(sizeof(struct ndp_redirect) == 36); ++ ++/* ++ * Structure of an icmpv6 header. ++ */ ++struct icmp6 { ++ uint8_t icmp6_type; /* type of message, see below */ ++ uint8_t icmp6_code; /* type sub code */ ++ uint16_t icmp6_cksum; /* ones complement cksum of struct */ ++ union { ++ union icmp6_error_body error_body; ++ struct icmp6_echo echo; ++ struct ndp_rs ndp_rs; ++ struct ndp_ra ndp_ra; ++ struct ndp_ns ndp_ns; ++ struct ndp_na ndp_na; ++ struct ndp_redirect ndp_redirect; ++ } icmp6_body; ++#define icmp6_err icmp6_body.error_body ++#define icmp6_echo icmp6_body.echo ++#define icmp6_nrs icmp6_body.ndp_rs ++#define icmp6_nra icmp6_body.ndp_ra ++#define icmp6_nns icmp6_body.ndp_ns ++#define icmp6_nna icmp6_body.ndp_na ++#define icmp6_redirect icmp6_body.ndp_redirect ++}; ++ ++G_STATIC_ASSERT(sizeof(struct icmp6) == 40); ++ ++#define ICMP6_MINLEN 4 ++#define ICMP6_ERROR_MINLEN 8 ++#define ICMP6_ECHO_MINLEN 8 ++#define ICMP6_NDP_RS_MINLEN 8 ++#define ICMP6_NDP_RA_MINLEN 16 ++#define ICMP6_NDP_NS_MINLEN 24 ++#define ICMP6_NDP_NA_MINLEN 24 ++#define ICMP6_NDP_REDIRECT_MINLEN 40 ++ ++/* ++ * NDP Options ++ */ ++struct ndpopt { ++ uint8_t ndpopt_type; /* Option type */ ++ uint8_t ndpopt_len; /* /!\ In units of 8 octets */ ++ union { ++ unsigned char linklayer_addr[6]; /* Source/Target Link-layer */ ++#define ndpopt_linklayer ndpopt_body.linklayer_addr ++ struct prefixinfo { /* Prefix Information */ ++ uint8_t prefix_length; ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint8_t L : 1, A : 1, reserved1 : 6; ++#else ++ uint8_t reserved1 : 6, A : 1, L : 1; ++#endif ++ uint32_t valid_lt; /* Valid Lifetime */ ++ uint32_t pref_lt; /* Preferred Lifetime */ ++ uint32_t reserved2; ++ struct in6_addr prefix; ++ } SLIRP_PACKED prefixinfo; ++#define ndpopt_prefixinfo ndpopt_body.prefixinfo ++ struct rdnss { ++ uint16_t reserved; ++ uint32_t lifetime; ++ struct in6_addr addr; ++ } SLIRP_PACKED rdnss; ++#define ndpopt_rdnss ndpopt_body.rdnss ++ } ndpopt_body; ++} SLIRP_PACKED; ++ ++/* NDP options type */ ++#define NDPOPT_LINKLAYER_SOURCE 1 /* Source Link-Layer Address */ ++#define NDPOPT_LINKLAYER_TARGET 2 /* Target Link-Layer Address */ ++#define NDPOPT_PREFIX_INFO 3 /* Prefix Information */ ++#define NDPOPT_RDNSS 25 /* Recursive DNS Server Address */ ++ ++/* NDP options size, in octets. */ ++#define NDPOPT_LINKLAYER_LEN 8 ++#define NDPOPT_PREFIXINFO_LEN 32 ++#define NDPOPT_RDNSS_LEN 24 ++ ++/* ++ * Definition of type and code field values. ++ * Per https://www.iana.org/assignments/icmpv6-parameters/icmpv6-parameters.xml ++ * Last Updated 2012-11-12 ++ */ ++ ++/* Errors */ ++#define ICMP6_UNREACH 1 /* Destination Unreachable */ ++#define ICMP6_UNREACH_NO_ROUTE 0 /* no route to dest */ ++#define ICMP6_UNREACH_DEST_PROHIB 1 /* com with dest prohibited */ ++#define ICMP6_UNREACH_SCOPE 2 /* beyond scope of src addr */ ++#define ICMP6_UNREACH_ADDRESS 3 /* address unreachable */ ++#define ICMP6_UNREACH_PORT 4 /* port unreachable */ ++#define ICMP6_UNREACH_SRC_FAIL 5 /* src addr failed */ ++#define ICMP6_UNREACH_REJECT_ROUTE 6 /* reject route to dest */ ++#define ICMP6_UNREACH_SRC_HDR_ERROR 7 /* error in src routing header */ ++#define ICMP6_TOOBIG 2 /* Packet Too Big */ ++#define ICMP6_TIMXCEED 3 /* Time Exceeded */ ++#define ICMP6_TIMXCEED_INTRANS 0 /* hop limit exceeded in transit */ ++#define ICMP6_TIMXCEED_REASS 1 /* ttl=0 in reass */ ++#define ICMP6_PARAMPROB 4 /* Parameter Problem */ ++#define ICMP6_PARAMPROB_HDR_FIELD 0 /* err header field */ ++#define ICMP6_PARAMPROB_NXTHDR_TYPE 1 /* unrecognized Next Header type */ ++#define ICMP6_PARAMPROB_IPV6_OPT 2 /* unrecognized IPv6 option */ ++ ++/* Informational Messages */ ++#define ICMP6_ECHO_REQUEST 128 /* Echo Request */ ++#define ICMP6_ECHO_REPLY 129 /* Echo Reply */ ++#define ICMP6_NDP_RS 133 /* Router Solicitation (NDP) */ ++#define ICMP6_NDP_RA 134 /* Router Advertisement (NDP) */ ++#define ICMP6_NDP_NS 135 /* Neighbor Solicitation (NDP) */ ++#define ICMP6_NDP_NA 136 /* Neighbor Advertisement (NDP) */ ++#define ICMP6_NDP_REDIRECT 137 /* Redirect Message (NDP) */ ++ ++/* ++ * Router Configuration Variables (rfc4861#section-6) ++ */ ++#define NDP_IsRouter 1 ++#define NDP_AdvSendAdvertisements 1 ++#define NDP_MaxRtrAdvInterval 600000 ++#define NDP_MinRtrAdvInterval \ ++ ((NDP_MaxRtrAdvInterval >= 9) ? NDP_MaxRtrAdvInterval / 3 : \ ++ NDP_MaxRtrAdvInterval) ++#define NDP_AdvManagedFlag 0 ++#define NDP_AdvOtherConfigFlag 0 ++#define NDP_AdvLinkMTU 0 ++#define NDP_AdvReachableTime 0 ++#define NDP_AdvRetransTime 0 ++#define NDP_AdvCurHopLimit 64 ++#define NDP_AdvDefaultLifetime ((3 * NDP_MaxRtrAdvInterval) / 1000) ++#define NDP_AdvValidLifetime 86400 ++#define NDP_AdvOnLinkFlag 1 ++#define NDP_AdvPrefLifetime 14400 ++#define NDP_AdvAutonomousFlag 1 ++ ++void icmp6_init(Slirp *slirp); ++void icmp6_cleanup(Slirp *slirp); ++void icmp6_input(struct mbuf *); ++void icmp6_forward_error(struct mbuf *m, uint8_t type, uint8_t code, struct in6_addr *src); ++void icmp6_send_error(struct mbuf *m, uint8_t type, uint8_t code); ++void ndp_send_ra(Slirp *slirp); ++void ndp_send_ns(Slirp *slirp, struct in6_addr addr); ++ ++#endif +diff --git a/slirp/src/ip6_input.c b/slirp/src/ip6_input.c +new file mode 100644 +index 0000000000..b3d98653df +--- /dev/null ++++ b/slirp/src/ip6_input.c +@@ -0,0 +1,88 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#include "slirp.h" ++#include "ip6_icmp.h" ++ ++/* ++ * IP initialization: fill in IP protocol switch table. ++ * All protocols not implemented in kernel go to raw IP protocol handler. ++ */ ++void ip6_init(Slirp *slirp) ++{ ++ icmp6_init(slirp); ++} ++ ++void ip6_cleanup(Slirp *slirp) ++{ ++ icmp6_cleanup(slirp); ++} ++ ++void ip6_input(struct mbuf *m) ++{ ++ Slirp *slirp = m->slirp; ++ /* NDP reads the ethernet header for gratuitous NDP */ ++ M_DUP_DEBUG(slirp, m, 1, TCPIPHDR_DELTA + 2 + ETH_HLEN); ++ ++ struct ip6 *ip6; ++ ++ if (!slirp->in6_enabled) { ++ goto bad; ++ } ++ ++ DEBUG_CALL("ip6_input"); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("m_len = %d", m->m_len); ++ ++ if (m->m_len < sizeof(struct ip6)) { ++ goto bad; ++ } ++ ++ ip6 = mtod(m, struct ip6 *); ++ ++ if (ip6->ip_v != IP6VERSION) { ++ goto bad; ++ } ++ ++ if (ntohs(ip6->ip_pl) + sizeof(struct ip6) > slirp->if_mtu) { ++ icmp6_send_error(m, ICMP6_TOOBIG, 0); ++ goto bad; ++ } ++ ++ // Check if the message size is big enough to hold what's ++ // set in the payload length header. If not this is an invalid ++ // packet ++ if (m->m_len < ntohs(ip6->ip_pl) + sizeof(struct ip6)) { ++ goto bad; ++ } ++ ++ /* check ip_ttl for a correct ICMP reply */ ++ if (ip6->ip_hl == 0) { ++ icmp6_send_error(m, ICMP6_TIMXCEED, ICMP6_TIMXCEED_INTRANS); ++ goto bad; ++ } ++ ++ /* ++ * Switch out to protocol's input routine. ++ */ ++ switch (ip6->ip_nh) { ++ case IPPROTO_TCP: ++ NTOHS(ip6->ip_pl); ++ tcp_input(m, sizeof(struct ip6), (struct socket *)NULL, AF_INET6); ++ break; ++ case IPPROTO_UDP: ++ udp6_input(m); ++ break; ++ case IPPROTO_ICMPV6: ++ icmp6_input(m); ++ break; ++ default: ++ m_free(m); ++ } ++ return; ++bad: ++ m_free(m); ++} +diff --git a/slirp/src/ip6_output.c b/slirp/src/ip6_output.c +new file mode 100644 +index 0000000000..834f1c0a32 +--- /dev/null ++++ b/slirp/src/ip6_output.c +@@ -0,0 +1,45 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#include "slirp.h" ++ ++/* Number of packets queued before we start sending ++ * (to prevent allocing too many mbufs) */ ++#define IF6_THRESH 10 ++ ++/* ++ * IPv6 output. The packet in mbuf chain m contains a IP header ++ */ ++int ip6_output(struct socket *so, struct mbuf *m, int fast) ++{ ++ Slirp *slirp = m->slirp; ++ M_DUP_DEBUG(slirp, m, 0, 0); ++ ++ struct ip6 *ip = mtod(m, struct ip6 *); ++ ++ DEBUG_CALL("ip6_output"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ ++ /* Fill IPv6 header */ ++ ip->ip_v = IP6VERSION; ++ ip->ip_hl = IP6_HOP_LIMIT; ++ ip->ip_tc_hi = 0; ++ ip->ip_tc_lo = 0; ++ ip->ip_fl_hi = 0; ++ ip->ip_fl_lo = 0; ++ ++ if (fast) { ++ /* We cannot fast-send non-multicast, we'd need a NDP NS */ ++ assert(IN6_IS_ADDR_MULTICAST(&ip->ip_dst)); ++ if_encap(m->slirp, m); ++ m_free(m); ++ } else { ++ if_output(so, m); ++ } ++ ++ return 0; ++} +diff --git a/slirp/src/ip_icmp.c b/slirp/src/ip_icmp.c +new file mode 100644 +index 0000000000..9fba653a46 +--- /dev/null ++++ b/slirp/src/ip_icmp.c +@@ -0,0 +1,524 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94 ++ * ip_icmp.c,v 1.7 1995/05/30 08:09:42 rgrimes Exp ++ */ ++ ++#include "slirp.h" ++#include "ip_icmp.h" ++ ++#ifndef WITH_ICMP_ERROR_MSG ++#define WITH_ICMP_ERROR_MSG 0 ++#endif ++ ++/* The message sent when emulating PING */ ++/* Be nice and tell them it's just a pseudo-ping packet */ ++static const char icmp_ping_msg[] = ++ "This is a pseudo-PING packet used by Slirp to emulate ICMP ECHO-REQUEST " ++ "packets.\n"; ++ ++/* list of actions for icmp_send_error() on RX of an icmp message */ ++static const int icmp_flush[19] = { ++ /* ECHO REPLY (0) */ 0, ++ 1, ++ 1, ++ /* DEST UNREACH (3) */ 1, ++ /* SOURCE QUENCH (4)*/ 1, ++ /* REDIRECT (5) */ 1, ++ 1, ++ 1, ++ /* ECHO (8) */ 0, ++ /* ROUTERADVERT (9) */ 1, ++ /* ROUTERSOLICIT (10) */ 1, ++ /* TIME EXCEEDED (11) */ 1, ++ /* PARAMETER PROBLEM (12) */ 1, ++ /* TIMESTAMP (13) */ 0, ++ /* TIMESTAMP REPLY (14) */ 0, ++ /* INFO (15) */ 0, ++ /* INFO REPLY (16) */ 0, ++ /* ADDR MASK (17) */ 0, ++ /* ADDR MASK REPLY (18) */ 0 ++}; ++ ++void icmp_init(Slirp *slirp) ++{ ++ slirp->icmp.so_next = slirp->icmp.so_prev = &slirp->icmp; ++ slirp->icmp_last_so = &slirp->icmp; ++} ++ ++void icmp_cleanup(Slirp *slirp) ++{ ++ struct socket *so, *so_next; ++ ++ for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so_next) { ++ so_next = so->so_next; ++ icmp_detach(so); ++ } ++} ++ ++static int icmp_send(struct socket *so, struct mbuf *m, int hlen) ++{ ++ Slirp *slirp = m->slirp; ++ M_DUP_DEBUG(slirp, m, 0, 0); ++ ++ struct ip *ip = mtod(m, struct ip *); ++ struct sockaddr_in addr; ++ ++ so->s = slirp_socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP); ++ if (so->s == -1) { ++ return -1; ++ } ++ so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); ++ ++ if (slirp_bind_outbound(so, AF_INET) != 0) { ++ // bind failed - close socket ++ closesocket(so->s); ++ so->s = -1; ++ return -1; ++ } ++ ++ so->so_m = m; ++ so->so_faddr = ip->ip_dst; ++ so->so_laddr = ip->ip_src; ++ so->so_iptos = ip->ip_tos; ++ so->so_type = IPPROTO_ICMP; ++ so->so_state = SS_ISFCONNECTED; ++ so->so_expire = curtime + SO_EXPIRE; ++ ++ addr.sin_family = AF_INET; ++ addr.sin_addr = so->so_faddr; ++ ++ insque(so, &so->slirp->icmp); ++ ++ if (sendto(so->s, m->m_data + hlen, m->m_len - hlen, 0, ++ (struct sockaddr *)&addr, sizeof(addr)) == -1) { ++ DEBUG_MISC("icmp_input icmp sendto tx errno = %d-%s", errno, ++ strerror(errno)); ++ icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, strerror(errno)); ++ icmp_detach(so); ++ } ++ ++ return 0; ++} ++ ++void icmp_detach(struct socket *so) ++{ ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ sofree(so); ++} ++ ++/* ++ * Process a received ICMP message. ++ */ ++void icmp_input(struct mbuf *m, int hlen) ++{ ++ Slirp *slirp = m->slirp; ++ M_DUP_DEBUG(slirp, m, 0, 0); ++ ++ register struct icmp *icp; ++ register struct ip *ip = mtod(m, struct ip *); ++ int icmplen = ip->ip_len; ++ ++ DEBUG_CALL("icmp_input"); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("m_len = %d", m->m_len); ++ ++ /* ++ * Locate icmp structure in mbuf, and check ++ * that its not corrupted and of at least minimum length. ++ */ ++ if (icmplen < ICMP_MINLEN) { /* min 8 bytes payload */ ++ freeit: ++ m_free(m); ++ goto end_error; ++ } ++ ++ m->m_len -= hlen; ++ m->m_data += hlen; ++ icp = mtod(m, struct icmp *); ++ if (cksum(m, icmplen)) { ++ goto freeit; ++ } ++ m->m_len += hlen; ++ m->m_data -= hlen; ++ ++ DEBUG_ARG("icmp_type = %d", icp->icmp_type); ++ switch (icp->icmp_type) { ++ case ICMP_ECHO: ++ ip->ip_len += hlen; /* since ip_input subtracts this */ ++ if (ip->ip_dst.s_addr == slirp->vhost_addr.s_addr || ++ ip->ip_dst.s_addr == slirp->vnameserver_addr.s_addr) { ++ icmp_reflect(m); ++ } else if (slirp->restricted) { ++ goto freeit; ++ } else { ++ struct socket *so; ++ struct sockaddr_storage addr; ++ int ttl; ++ ++ so = socreate(slirp); ++ if (icmp_send(so, m, hlen) == 0) { ++ /* We could send this as ICMP, good! */ ++ return; ++ } ++ ++ /* We could not send this as ICMP, try to send it on UDP echo ++ * service (7), wishfully hoping that it is open there. */ ++ ++ if (udp_attach(so, AF_INET) == -1) { ++ DEBUG_MISC("icmp_input udp_attach errno = %d-%s", errno, ++ strerror(errno)); ++ sofree(so); ++ m_free(m); ++ goto end_error; ++ } ++ so->so_m = m; ++ so->so_ffamily = AF_INET; ++ so->so_faddr = ip->ip_dst; ++ so->so_fport = htons(7); ++ so->so_lfamily = AF_INET; ++ so->so_laddr = ip->ip_src; ++ so->so_lport = htons(9); ++ so->so_iptos = ip->ip_tos; ++ so->so_type = IPPROTO_ICMP; ++ so->so_state = SS_ISFCONNECTED; ++ ++ /* Send the packet */ ++ addr = so->fhost.ss; ++ if (sotranslate_out(so, &addr) < 0) { ++ icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, ++ strerror(errno)); ++ udp_detach(so); ++ return; ++ } ++ ++ /* ++ * Check for TTL ++ */ ++ ttl = ip->ip_ttl-1; ++ if (ttl <= 0) { ++ DEBUG_MISC("udp ttl exceeded"); ++ icmp_send_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, ++ NULL); ++ udp_detach(so); ++ break; ++ } ++ setsockopt(so->s, IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)); ++ ++ if (sendto(so->s, icmp_ping_msg, strlen(icmp_ping_msg), 0, ++ (struct sockaddr *)&addr, sockaddr_size(&addr)) == -1) { ++ DEBUG_MISC("icmp_input udp sendto tx errno = %d-%s", errno, ++ strerror(errno)); ++ icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, ++ strerror(errno)); ++ udp_detach(so); ++ } ++ } /* if ip->ip_dst.s_addr == alias_addr.s_addr */ ++ break; ++ case ICMP_UNREACH: ++ /* XXX? report error? close socket? */ ++ case ICMP_TIMXCEED: ++ case ICMP_PARAMPROB: ++ case ICMP_SOURCEQUENCH: ++ case ICMP_TSTAMP: ++ case ICMP_MASKREQ: ++ case ICMP_REDIRECT: ++ m_free(m); ++ break; ++ ++ default: ++ m_free(m); ++ } /* switch */ ++ ++end_error: ++ /* m is m_free()'d xor put in a socket xor or given to ip_send */ ++ return; ++} ++ ++ ++/* ++ * Send an ICMP message in response to a situation ++ * ++ * RFC 1122: 3.2.2 MUST send at least the IP header and 8 bytes of header. ++ *MAY send more (we do). MUST NOT change this header information. MUST NOT reply ++ *to a multicast/broadcast IP address. MUST NOT reply to a multicast/broadcast ++ *MAC address. MUST reply to only the first fragment. ++ */ ++/* ++ * Send ICMP_UNREACH back to the source regarding msrc. ++ * mbuf *msrc is used as a template, but is NOT m_free()'d. ++ * It is reported as the bad ip packet. The header should ++ * be fully correct and in host byte order. ++ * ICMP fragmentation is illegal. All machines must accept 576 bytes in one ++ * packet. The maximum payload is 576-20(ip hdr)-8(icmp hdr)=548 ++ */ ++ ++#define ICMP_MAXDATALEN (IP_MSS - 28) ++void icmp_forward_error(struct mbuf *msrc, uint8_t type, uint8_t code, int minsize, ++ const char *message, struct in_addr *src) ++{ ++ unsigned hlen, shlen, s_ip_len; ++ register struct ip *ip; ++ register struct icmp *icp; ++ register struct mbuf *m; ++ ++ DEBUG_CALL("icmp_send_error"); ++ DEBUG_ARG("msrc = %p", msrc); ++ DEBUG_ARG("msrc_len = %d", msrc->m_len); ++ ++ if (type != ICMP_UNREACH && type != ICMP_TIMXCEED) ++ goto end_error; ++ ++ /* check msrc */ ++ if (!msrc) ++ goto end_error; ++ ip = mtod(msrc, struct ip *); ++ if (slirp_debug & DBG_MISC) { ++ char bufa[20], bufb[20]; ++ slirp_pstrcpy(bufa, sizeof(bufa), inet_ntoa(ip->ip_src)); ++ slirp_pstrcpy(bufb, sizeof(bufb), inet_ntoa(ip->ip_dst)); ++ DEBUG_MISC(" %.16s to %.16s", bufa, bufb); ++ } ++ if (ip->ip_off & IP_OFFMASK) ++ goto end_error; /* Only reply to fragment 0 */ ++ ++ /* Do not reply to source-only IPs */ ++ if ((ip->ip_src.s_addr & htonl(~(0xf << 28))) == 0) { ++ goto end_error; ++ } ++ ++ shlen = ip->ip_hl << 2; ++ s_ip_len = ip->ip_len; ++ if (ip->ip_p == IPPROTO_ICMP) { ++ icp = (struct icmp *)((char *)ip + shlen); ++ /* ++ * Assume any unknown ICMP type is an error. This isn't ++ * specified by the RFC, but think about it.. ++ */ ++ if (icp->icmp_type > 18 || icmp_flush[icp->icmp_type]) ++ goto end_error; ++ } ++ ++ /* make a copy */ ++ m = m_get(msrc->slirp); ++ if (!m) { ++ goto end_error; ++ } ++ ++ { ++ int new_m_size; ++ new_m_size = ++ sizeof(struct ip) + ICMP_MINLEN + msrc->m_len + ICMP_MAXDATALEN; ++ if (new_m_size > m->m_size) ++ m_inc(m, new_m_size); ++ } ++ memcpy(m->m_data, msrc->m_data, msrc->m_len); ++ m->m_len = msrc->m_len; /* copy msrc to m */ ++ ++ /* make the header of the reply packet */ ++ ip = mtod(m, struct ip *); ++ hlen = sizeof(struct ip); /* no options in reply */ ++ ++ /* fill in icmp */ ++ m->m_data += hlen; ++ m->m_len -= hlen; ++ ++ icp = mtod(m, struct icmp *); ++ ++ if (minsize) ++ s_ip_len = shlen + ICMP_MINLEN; /* return header+8b only */ ++ else if (s_ip_len > ICMP_MAXDATALEN) /* maximum size */ ++ s_ip_len = ICMP_MAXDATALEN; ++ ++ m->m_len = ICMP_MINLEN + s_ip_len; /* 8 bytes ICMP header */ ++ ++ /* min. size = 8+sizeof(struct ip)+8 */ ++ ++ icp->icmp_type = type; ++ icp->icmp_code = code; ++ icp->icmp_id = 0; ++ icp->icmp_seq = 0; ++ ++ memcpy(&icp->icmp_ip, msrc->m_data, s_ip_len); /* report the ip packet */ ++ HTONS(icp->icmp_ip.ip_len); ++ HTONS(icp->icmp_ip.ip_id); ++ HTONS(icp->icmp_ip.ip_off); ++ ++ if (message && WITH_ICMP_ERROR_MSG) { /* append message to ICMP packet */ ++ int message_len; ++ char *cpnt; ++ message_len = strlen(message); ++ if (message_len > ICMP_MAXDATALEN) ++ message_len = ICMP_MAXDATALEN; ++ cpnt = (char *)m->m_data + m->m_len; ++ memcpy(cpnt, message, message_len); ++ m->m_len += message_len; ++ } ++ ++ icp->icmp_cksum = 0; ++ icp->icmp_cksum = cksum(m, m->m_len); ++ ++ m->m_data -= hlen; ++ m->m_len += hlen; ++ ++ /* fill in ip */ ++ ip->ip_hl = hlen >> 2; ++ ip->ip_len = m->m_len; ++ ++ ip->ip_tos = ((ip->ip_tos & 0x1E) | 0xC0); /* high priority for errors */ ++ ++ ip->ip_ttl = MAXTTL; ++ ip->ip_p = IPPROTO_ICMP; ++ ip->ip_dst = ip->ip_src; /* ip addresses */ ++ ip->ip_src = *src; ++ ++ ip_output((struct socket *)NULL, m); ++ ++end_error: ++ return; ++} ++#undef ICMP_MAXDATALEN ++ ++void icmp_send_error(struct mbuf *msrc, uint8_t type, uint8_t code, int minsize, ++ const char *message) ++{ ++ icmp_forward_error(msrc, type, code, minsize, message, &msrc->slirp->vhost_addr); ++} ++ ++/* ++ * Reflect the ip packet back to the source ++ */ ++void icmp_reflect(struct mbuf *m) ++{ ++ register struct ip *ip = mtod(m, struct ip *); ++ int hlen = ip->ip_hl << 2; ++ int optlen = hlen - sizeof(struct ip); ++ register struct icmp *icp; ++ ++ /* ++ * Send an icmp packet back to the ip level, ++ * after supplying a checksum. ++ */ ++ m->m_data += hlen; ++ m->m_len -= hlen; ++ icp = mtod(m, struct icmp *); ++ ++ icp->icmp_type = ICMP_ECHOREPLY; ++ icp->icmp_cksum = 0; ++ icp->icmp_cksum = cksum(m, ip->ip_len - hlen); ++ ++ m->m_data -= hlen; ++ m->m_len += hlen; ++ ++ /* fill in ip */ ++ if (optlen > 0) { ++ /* ++ * Strip out original options by copying rest of first ++ * mbuf's data back, and adjust the IP length. ++ */ ++ memmove((char *)(ip + 1), (char *)ip + hlen, ++ (unsigned)(m->m_len - hlen)); ++ hlen -= optlen; ++ ip->ip_hl = hlen >> 2; ++ ip->ip_len -= optlen; ++ m->m_len -= optlen; ++ } ++ ++ ip->ip_ttl = MAXTTL; ++ { /* swap */ ++ struct in_addr icmp_dst; ++ icmp_dst = ip->ip_dst; ++ ip->ip_dst = ip->ip_src; ++ ip->ip_src = icmp_dst; ++ } ++ ++ ip_output((struct socket *)NULL, m); ++} ++ ++void icmp_receive(struct socket *so) ++{ ++ struct mbuf *m = so->so_m; ++ struct ip *ip = mtod(m, struct ip *); ++ int hlen = ip->ip_hl << 2; ++ uint8_t error_code; ++ struct icmp *icp; ++ int id, len; ++ ++ m->m_data += hlen; ++ m->m_len -= hlen; ++ icp = mtod(m, struct icmp *); ++ ++ id = icp->icmp_id; ++ len = recv(so->s, icp, M_ROOM(m), 0); ++ /* ++ * The behavior of reading SOCK_DGRAM+IPPROTO_ICMP sockets is inconsistent ++ * between host OSes. On Linux, only the ICMP header and payload is ++ * included. On macOS/Darwin, the socket acts like a raw socket and ++ * includes the IP header as well. On other BSDs, SOCK_DGRAM+IPPROTO_ICMP ++ * sockets aren't supported at all, so we treat them like raw sockets. It ++ * isn't possible to detect this difference at runtime, so we must use an ++ * #ifdef to determine if we need to remove the IP header. ++ */ ++#ifdef CONFIG_BSD ++ if (len >= sizeof(struct ip)) { ++ struct ip *inner_ip = mtod(m, struct ip *); ++ int inner_hlen = inner_ip->ip_hl << 2; ++ if (inner_hlen > len) { ++ len = -1; ++ errno = -EINVAL; ++ } else { ++ len -= inner_hlen; ++ memmove(icp, (unsigned char *)icp + inner_hlen, len); ++ } ++ } else { ++ len = -1; ++ errno = -EINVAL; ++ } ++#endif ++ icp->icmp_id = id; ++ ++ m->m_data -= hlen; ++ m->m_len += hlen; ++ ++ if (len == -1 || len == 0) { ++ if (errno == ENETUNREACH) { ++ error_code = ICMP_UNREACH_NET; ++ } else { ++ error_code = ICMP_UNREACH_HOST; ++ } ++ DEBUG_MISC(" udp icmp rx errno = %d-%s", errno, strerror(errno)); ++ icmp_send_error(so->so_m, ICMP_UNREACH, error_code, 0, strerror(errno)); ++ } else { ++ icmp_reflect(so->so_m); ++ so->so_m = NULL; /* Don't m_free() it again! */ ++ } ++ icmp_detach(so); ++} +diff --git a/slirp/src/ip_icmp.h b/slirp/src/ip_icmp.h +new file mode 100644 +index 0000000000..569a083061 +--- /dev/null ++++ b/slirp/src/ip_icmp.h +@@ -0,0 +1,168 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)ip_icmp.h 8.1 (Berkeley) 6/10/93 ++ * ip_icmp.h,v 1.4 1995/05/30 08:09:43 rgrimes Exp ++ */ ++ ++#ifndef NETINET_IP_ICMP_H ++#define NETINET_IP_ICMP_H ++ ++/* ++ * Interface Control Message Protocol Definitions. ++ * Per RFC 792, September 1981. ++ */ ++ ++typedef uint32_t n_time; ++ ++/* ++ * Structure of an icmp header. ++ */ ++struct icmp { ++ uint8_t icmp_type; /* type of message, see below */ ++ uint8_t icmp_code; /* type sub code */ ++ uint16_t icmp_cksum; /* ones complement cksum of struct */ ++ union { ++ uint8_t ih_pptr; /* ICMP_PARAMPROB */ ++ struct in_addr ih_gwaddr; /* ICMP_REDIRECT */ ++ struct ih_idseq { ++ uint16_t icd_id; ++ uint16_t icd_seq; ++ } ih_idseq; ++ int ih_void; ++ ++ /* ICMP_UNREACH_NEEDFRAG -- Path MTU Discovery (RFC1191) */ ++ struct ih_pmtu { ++ uint16_t ipm_void; ++ uint16_t ipm_nextmtu; ++ } ih_pmtu; ++ } icmp_hun; ++#define icmp_pptr icmp_hun.ih_pptr ++#define icmp_gwaddr icmp_hun.ih_gwaddr ++#define icmp_id icmp_hun.ih_idseq.icd_id ++#define icmp_seq icmp_hun.ih_idseq.icd_seq ++#define icmp_void icmp_hun.ih_void ++#define icmp_pmvoid icmp_hun.ih_pmtu.ipm_void ++#define icmp_nextmtu icmp_hun.ih_pmtu.ipm_nextmtu ++ union { ++ struct id_ts { ++ n_time its_otime; ++ n_time its_rtime; ++ n_time its_ttime; ++ } id_ts; ++ struct id_ip { ++ struct ip idi_ip; ++ /* options and then 64 bits of data */ ++ } id_ip; ++ uint32_t id_mask; ++ char id_data[1]; ++ } icmp_dun; ++#define icmp_otime icmp_dun.id_ts.its_otime ++#define icmp_rtime icmp_dun.id_ts.its_rtime ++#define icmp_ttime icmp_dun.id_ts.its_ttime ++#define icmp_ip icmp_dun.id_ip.idi_ip ++#define icmp_mask icmp_dun.id_mask ++#define icmp_data icmp_dun.id_data ++}; ++ ++/* ++ * Lower bounds on packet lengths for various types. ++ * For the error advice packets must first ensure that the ++ * packet is large enough to contain the returned ip header. ++ * Only then can we do the check to see if 64 bits of packet ++ * data have been returned, since we need to check the returned ++ * ip header length. ++ */ ++#define ICMP_MINLEN 8 /* abs minimum */ ++#define ICMP_TSLEN (8 + 3 * sizeof(n_time)) /* timestamp */ ++#define ICMP_MASKLEN 12 /* address mask */ ++#define ICMP_ADVLENMIN (8 + sizeof(struct ip) + 8) /* min */ ++#define ICMP_ADVLEN(p) (8 + ((p)->icmp_ip.ip_hl << 2) + 8) ++/* N.B.: must separately check that ip_hl >= 5 */ ++ ++/* ++ * Definition of type and code field values. ++ */ ++#define ICMP_ECHOREPLY 0 /* echo reply */ ++#define ICMP_UNREACH 3 /* dest unreachable, codes: */ ++#define ICMP_UNREACH_NET 0 /* bad net */ ++#define ICMP_UNREACH_HOST 1 /* bad host */ ++#define ICMP_UNREACH_PROTOCOL 2 /* bad protocol */ ++#define ICMP_UNREACH_PORT 3 /* bad port */ ++#define ICMP_UNREACH_NEEDFRAG 4 /* IP_DF caused drop */ ++#define ICMP_UNREACH_SRCFAIL 5 /* src route failed */ ++#define ICMP_UNREACH_NET_UNKNOWN 6 /* unknown net */ ++#define ICMP_UNREACH_HOST_UNKNOWN 7 /* unknown host */ ++#define ICMP_UNREACH_ISOLATED 8 /* src host isolated */ ++#define ICMP_UNREACH_NET_PROHIB 9 /* prohibited access */ ++#define ICMP_UNREACH_HOST_PROHIB 10 /* ditto */ ++#define ICMP_UNREACH_TOSNET 11 /* bad tos for net */ ++#define ICMP_UNREACH_TOSHOST 12 /* bad tos for host */ ++#define ICMP_SOURCEQUENCH 4 /* packet lost, slow down */ ++#define ICMP_REDIRECT 5 /* shorter route, codes: */ ++#define ICMP_REDIRECT_NET 0 /* for network */ ++#define ICMP_REDIRECT_HOST 1 /* for host */ ++#define ICMP_REDIRECT_TOSNET 2 /* for tos and net */ ++#define ICMP_REDIRECT_TOSHOST 3 /* for tos and host */ ++#define ICMP_ECHO 8 /* echo service */ ++#define ICMP_ROUTERADVERT 9 /* router advertisement */ ++#define ICMP_ROUTERSOLICIT 10 /* router solicitation */ ++#define ICMP_TIMXCEED 11 /* time exceeded, code: */ ++#define ICMP_TIMXCEED_INTRANS 0 /* ttl==0 in transit */ ++#define ICMP_TIMXCEED_REASS 1 /* ttl==0 in reass */ ++#define ICMP_PARAMPROB 12 /* ip header bad */ ++#define ICMP_PARAMPROB_OPTABSENT 1 /* req. opt. absent */ ++#define ICMP_TSTAMP 13 /* timestamp request */ ++#define ICMP_TSTAMPREPLY 14 /* timestamp reply */ ++#define ICMP_IREQ 15 /* information request */ ++#define ICMP_IREQREPLY 16 /* information reply */ ++#define ICMP_MASKREQ 17 /* address mask request */ ++#define ICMP_MASKREPLY 18 /* address mask reply */ ++ ++#define ICMP_MAXTYPE 18 ++ ++#define ICMP_INFOTYPE(type) \ ++ ((type) == ICMP_ECHOREPLY || (type) == ICMP_ECHO || \ ++ (type) == ICMP_ROUTERADVERT || (type) == ICMP_ROUTERSOLICIT || \ ++ (type) == ICMP_TSTAMP || (type) == ICMP_TSTAMPREPLY || \ ++ (type) == ICMP_IREQ || (type) == ICMP_IREQREPLY || \ ++ (type) == ICMP_MASKREQ || (type) == ICMP_MASKREPLY) ++ ++void icmp_init(Slirp *slirp); ++void icmp_cleanup(Slirp *slirp); ++void icmp_input(struct mbuf *, int); ++void icmp_forward_error(struct mbuf *msrc, uint8_t type, uint8_t code, int minsize, ++ const char *message, struct in_addr *src); ++void icmp_send_error(struct mbuf *msrc, uint8_t type, uint8_t code, int minsize, ++ const char *message); ++void icmp_reflect(struct mbuf *); ++void icmp_receive(struct socket *so); ++void icmp_detach(struct socket *so); ++ ++#endif +diff --git a/slirp/src/ip_input.c b/slirp/src/ip_input.c +new file mode 100644 +index 0000000000..a29c324cce +--- /dev/null ++++ b/slirp/src/ip_input.c +@@ -0,0 +1,463 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 ++ * ip_input.c,v 1.11 1994/11/16 10:17:08 jkh Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP are ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++#include "ip_icmp.h" ++ ++static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp); ++static void ip_freef(Slirp *slirp, struct ipq *fp); ++static void ip_enq(register struct ipasfrag *p, register struct ipasfrag *prev); ++static void ip_deq(register struct ipasfrag *p); ++ ++/* ++ * IP initialization: fill in IP protocol switch table. ++ * All protocols not implemented in kernel go to raw IP protocol handler. ++ */ ++void ip_init(Slirp *slirp) ++{ ++ slirp->ipq.ip_link.next = slirp->ipq.ip_link.prev = &slirp->ipq.ip_link; ++ udp_init(slirp); ++ tcp_init(slirp); ++ icmp_init(slirp); ++} ++ ++void ip_cleanup(Slirp *slirp) ++{ ++ udp_cleanup(slirp); ++ tcp_cleanup(slirp); ++ icmp_cleanup(slirp); ++} ++ ++/* ++ * Ip input routine. Checksum and byte swap header. If fragmented ++ * try to reassemble. Process options. Pass to next level. ++ */ ++void ip_input(struct mbuf *m) ++{ ++ Slirp *slirp = m->slirp; ++ M_DUP_DEBUG(slirp, m, 0, TCPIPHDR_DELTA); ++ ++ register struct ip *ip; ++ int hlen; ++ ++ if (!slirp->in_enabled) { ++ goto bad; ++ } ++ ++ DEBUG_CALL("ip_input"); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("m_len = %d", m->m_len); ++ ++ if (m->m_len < sizeof(struct ip)) { ++ goto bad; ++ } ++ ++ ip = mtod(m, struct ip *); ++ ++ if (ip->ip_v != IPVERSION) { ++ goto bad; ++ } ++ ++ hlen = ip->ip_hl << 2; ++ if (hlen < sizeof(struct ip) || hlen > m->m_len) { /* min header length */ ++ goto bad; /* or packet too short */ ++ } ++ ++ /* keep ip header intact for ICMP reply ++ * ip->ip_sum = cksum(m, hlen); ++ * if (ip->ip_sum) { ++ */ ++ if (cksum(m, hlen)) { ++ goto bad; ++ } ++ ++ /* ++ * Convert fields to host representation. ++ */ ++ NTOHS(ip->ip_len); ++ if (ip->ip_len < hlen) { ++ goto bad; ++ } ++ NTOHS(ip->ip_id); ++ NTOHS(ip->ip_off); ++ ++ /* ++ * Check that the amount of data in the buffers ++ * is as at least much as the IP header would have us expect. ++ * Trim mbufs if longer than we expect. ++ * Drop packet if shorter than we expect. ++ */ ++ if (m->m_len < ip->ip_len) { ++ goto bad; ++ } ++ ++ /* Should drop packet if mbuf too long? hmmm... */ ++ if (m->m_len > ip->ip_len) ++ m_adj(m, ip->ip_len - m->m_len); ++ ++ /* check ip_ttl for a correct ICMP reply */ ++ if (ip->ip_ttl == 0) { ++ icmp_send_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, "ttl"); ++ goto bad; ++ } ++ ++ /* ++ * If offset or IP_MF are set, must reassemble. ++ * Otherwise, nothing need be done. ++ * (We could look in the reassembly queue to see ++ * if the packet was previously fragmented, ++ * but it's not worth the time; just let them time out.) ++ * ++ * XXX This should fail, don't fragment yet ++ */ ++ if (ip->ip_off & ~IP_DF) { ++ register struct ipq *fp; ++ struct qlink *l; ++ /* ++ * Look for queue of fragments ++ * of this datagram. ++ */ ++ for (l = slirp->ipq.ip_link.next; l != &slirp->ipq.ip_link; ++ l = l->next) { ++ fp = container_of(l, struct ipq, ip_link); ++ if (ip->ip_id == fp->ipq_id && ++ ip->ip_src.s_addr == fp->ipq_src.s_addr && ++ ip->ip_dst.s_addr == fp->ipq_dst.s_addr && ++ ip->ip_p == fp->ipq_p) ++ goto found; ++ } ++ fp = NULL; ++ found: ++ ++ /* ++ * Adjust ip_len to not reflect header, ++ * set ip_mff if more fragments are expected, ++ * convert offset of this to bytes. ++ */ ++ ip->ip_len -= hlen; ++ if (ip->ip_off & IP_MF) ++ ip->ip_tos |= 1; ++ else ++ ip->ip_tos &= ~1; ++ ++ ip->ip_off <<= 3; ++ ++ /* ++ * If datagram marked as having more fragments ++ * or if this is not the first fragment, ++ * attempt reassembly; if it succeeds, proceed. ++ */ ++ if (ip->ip_tos & 1 || ip->ip_off) { ++ ip = ip_reass(slirp, ip, fp); ++ if (ip == NULL) ++ return; ++ m = dtom(slirp, ip); ++ } else if (fp) ++ ip_freef(slirp, fp); ++ ++ } else ++ ip->ip_len -= hlen; ++ ++ /* ++ * Switch out to protocol's input routine. ++ */ ++ switch (ip->ip_p) { ++ case IPPROTO_TCP: ++ tcp_input(m, hlen, (struct socket *)NULL, AF_INET); ++ break; ++ case IPPROTO_UDP: ++ udp_input(m, hlen); ++ break; ++ case IPPROTO_ICMP: ++ icmp_input(m, hlen); ++ break; ++ default: ++ m_free(m); ++ } ++ return; ++bad: ++ m_free(m); ++} ++ ++#define iptofrag(P) ((struct ipasfrag *)(((char *)(P)) - sizeof(struct qlink))) ++#define fragtoip(P) ((struct ip *)(((char *)(P)) + sizeof(struct qlink))) ++/* ++ * Take incoming datagram fragment and try to ++ * reassemble it into whole datagram. If a chain for ++ * reassembly of this datagram already exists, then it ++ * is given as fp; otherwise have to make a chain. ++ */ ++static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp) ++{ ++ register struct mbuf *m = dtom(slirp, ip); ++ register struct ipasfrag *q; ++ int hlen = ip->ip_hl << 2; ++ int i, next; ++ ++ DEBUG_CALL("ip_reass"); ++ DEBUG_ARG("ip = %p", ip); ++ DEBUG_ARG("fp = %p", fp); ++ DEBUG_ARG("m = %p", m); ++ ++ /* ++ * Presence of header sizes in mbufs ++ * would confuse code below. ++ * Fragment m_data is concatenated. ++ */ ++ m->m_data += hlen; ++ m->m_len -= hlen; ++ ++ /* ++ * If first fragment to arrive, create a reassembly queue. ++ */ ++ if (fp == NULL) { ++ struct mbuf *t = m_get(slirp); ++ ++ if (t == NULL) { ++ goto dropfrag; ++ } ++ fp = mtod(t, struct ipq *); ++ insque(&fp->ip_link, &slirp->ipq.ip_link); ++ fp->ipq_ttl = IPFRAGTTL; ++ fp->ipq_p = ip->ip_p; ++ fp->ipq_id = ip->ip_id; ++ fp->frag_link.next = fp->frag_link.prev = &fp->frag_link; ++ fp->ipq_src = ip->ip_src; ++ fp->ipq_dst = ip->ip_dst; ++ q = (struct ipasfrag *)fp; ++ goto insert; ++ } ++ ++ /* ++ * Find a segment which begins after this one does. ++ */ ++ for (q = fp->frag_link.next; q != (struct ipasfrag *)&fp->frag_link; ++ q = q->ipf_next) ++ if (q->ipf_off > ip->ip_off) ++ break; ++ ++ /* ++ * If there is a preceding segment, it may provide some of ++ * our data already. If so, drop the data from the incoming ++ * segment. If it provides all of our data, drop us. ++ */ ++ if (q->ipf_prev != &fp->frag_link) { ++ struct ipasfrag *pq = q->ipf_prev; ++ i = pq->ipf_off + pq->ipf_len - ip->ip_off; ++ if (i > 0) { ++ if (i >= ip->ip_len) ++ goto dropfrag; ++ m_adj(dtom(slirp, ip), i); ++ ip->ip_off += i; ++ ip->ip_len -= i; ++ } ++ } ++ ++ /* ++ * While we overlap succeeding segments trim them or, ++ * if they are completely covered, dequeue them. ++ */ ++ while (q != (struct ipasfrag *)&fp->frag_link && ++ ip->ip_off + ip->ip_len > q->ipf_off) { ++ struct ipasfrag *prev; ++ i = (ip->ip_off + ip->ip_len) - q->ipf_off; ++ if (i < q->ipf_len) { ++ q->ipf_len -= i; ++ q->ipf_off += i; ++ m_adj(dtom(slirp, q), i); ++ break; ++ } ++ prev = q; ++ q = q->ipf_next; ++ ip_deq(prev); ++ m_free(dtom(slirp, prev)); ++ } ++ ++insert: ++ /* ++ * Stick new segment in its place; ++ * check for complete reassembly. ++ */ ++ ip_enq(iptofrag(ip), q->ipf_prev); ++ next = 0; ++ for (q = fp->frag_link.next; q != (struct ipasfrag *)&fp->frag_link; ++ q = q->ipf_next) { ++ if (q->ipf_off != next) ++ return NULL; ++ next += q->ipf_len; ++ } ++ if (((struct ipasfrag *)(q->ipf_prev))->ipf_tos & 1) ++ return NULL; ++ ++ /* ++ * Reassembly is complete; concatenate fragments. ++ */ ++ q = fp->frag_link.next; ++ m = dtom(slirp, q); ++ int delta = (char *)q - (m->m_flags & M_EXT ? m->m_ext : m->m_dat); ++ ++ q = (struct ipasfrag *)q->ipf_next; ++ while (q != (struct ipasfrag *)&fp->frag_link) { ++ struct mbuf *t = dtom(slirp, q); ++ q = (struct ipasfrag *)q->ipf_next; ++ m_cat(m, t); ++ } ++ ++ /* ++ * Create header for new ip packet by ++ * modifying header of first packet; ++ * dequeue and discard fragment reassembly header. ++ * Make header visible. ++ */ ++ q = fp->frag_link.next; ++ ++ /* ++ * If the fragments concatenated to an mbuf that's bigger than the total ++ * size of the fragment and the mbuf was not already using an m_ext buffer, ++ * then an m_ext buffer was allocated. But fp->ipq_next points to the old ++ * buffer (in the mbuf), so we must point ip into the new buffer. ++ */ ++ if (m->m_flags & M_EXT) { ++ q = (struct ipasfrag *)(m->m_ext + delta); ++ } ++ ++ ip = fragtoip(q); ++ ip->ip_len = next; ++ ip->ip_tos &= ~1; ++ ip->ip_src = fp->ipq_src; ++ ip->ip_dst = fp->ipq_dst; ++ remque(&fp->ip_link); ++ m_free(dtom(slirp, fp)); ++ m->m_len += (ip->ip_hl << 2); ++ m->m_data -= (ip->ip_hl << 2); ++ ++ return ip; ++ ++dropfrag: ++ m_free(m); ++ return NULL; ++} ++ ++/* ++ * Free a fragment reassembly header and all ++ * associated datagrams. ++ */ ++static void ip_freef(Slirp *slirp, struct ipq *fp) ++{ ++ register struct ipasfrag *q, *p; ++ ++ for (q = fp->frag_link.next; q != (struct ipasfrag *)&fp->frag_link; ++ q = p) { ++ p = q->ipf_next; ++ ip_deq(q); ++ m_free(dtom(slirp, q)); ++ } ++ remque(&fp->ip_link); ++ m_free(dtom(slirp, fp)); ++} ++ ++/* ++ * Put an ip fragment on a reassembly chain. ++ * Like insque, but pointers in middle of structure. ++ */ ++static void ip_enq(register struct ipasfrag *p, register struct ipasfrag *prev) ++{ ++ DEBUG_CALL("ip_enq"); ++ DEBUG_ARG("prev = %p", prev); ++ p->ipf_prev = prev; ++ p->ipf_next = prev->ipf_next; ++ ((struct ipasfrag *)(prev->ipf_next))->ipf_prev = p; ++ prev->ipf_next = p; ++} ++ ++/* ++ * To ip_enq as remque is to insque. ++ */ ++static void ip_deq(register struct ipasfrag *p) ++{ ++ ((struct ipasfrag *)(p->ipf_prev))->ipf_next = p->ipf_next; ++ ((struct ipasfrag *)(p->ipf_next))->ipf_prev = p->ipf_prev; ++} ++ ++/* ++ * IP timer processing; ++ * if a timer expires on a reassembly ++ * queue, discard it. ++ */ ++void ip_slowtimo(Slirp *slirp) ++{ ++ struct qlink *l; ++ ++ DEBUG_CALL("ip_slowtimo"); ++ ++ l = slirp->ipq.ip_link.next; ++ ++ if (l == NULL) ++ return; ++ ++ while (l != &slirp->ipq.ip_link) { ++ struct ipq *fp = container_of(l, struct ipq, ip_link); ++ l = l->next; ++ if (--fp->ipq_ttl == 0) { ++ ip_freef(slirp, fp); ++ } ++ } ++} ++ ++/* ++ * Strip out IP options, at higher ++ * level protocol in the kernel. ++ * Second argument is buffer to which options ++ * will be moved, and return value is their length. ++ * (XXX) should be deleted; last arg currently ignored. ++ */ ++void ip_stripoptions(register struct mbuf *m, struct mbuf *mopt) ++{ ++ register int i; ++ struct ip *ip = mtod(m, struct ip *); ++ register char *opts; ++ int olen; ++ ++ olen = (ip->ip_hl << 2) - sizeof(struct ip); ++ opts = (char *)(ip + 1); ++ i = m->m_len - (sizeof(struct ip) + olen); ++ memmove(opts, opts + olen, (unsigned)i); ++ m->m_len -= olen; ++ ++ ip->ip_hl = sizeof(struct ip) >> 2; ++} +diff --git a/slirp/src/ip_output.c b/slirp/src/ip_output.c +new file mode 100644 +index 0000000000..4f62605915 +--- /dev/null ++++ b/slirp/src/ip_output.c +@@ -0,0 +1,171 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 ++ * ip_output.c,v 1.9 1994/11/16 10:17:10 jkh Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP are ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++ ++/* Number of packets queued before we start sending ++ * (to prevent allocing too many mbufs) */ ++#define IF_THRESH 10 ++ ++/* ++ * IP output. The packet in mbuf chain m contains a skeletal IP ++ * header (with len, off, ttl, proto, tos, src, dst). ++ * The mbuf chain containing the packet will be freed. ++ * The mbuf opt, if present, will not be freed. ++ */ ++int ip_output(struct socket *so, struct mbuf *m0) ++{ ++ Slirp *slirp = m0->slirp; ++ M_DUP_DEBUG(slirp, m0, 0, 0); ++ ++ register struct ip *ip; ++ register struct mbuf *m = m0; ++ register int hlen = sizeof(struct ip); ++ int len, off, error = 0; ++ ++ DEBUG_CALL("ip_output"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m0 = %p", m0); ++ ++ ip = mtod(m, struct ip *); ++ /* ++ * Fill in IP header. ++ */ ++ ip->ip_v = IPVERSION; ++ ip->ip_off &= IP_DF; ++ ip->ip_id = htons(slirp->ip_id++); ++ ip->ip_hl = hlen >> 2; ++ ++ /* ++ * If small enough for interface, can just send directly. ++ */ ++ if ((uint16_t)ip->ip_len <= slirp->if_mtu) { ++ ip->ip_len = htons((uint16_t)ip->ip_len); ++ ip->ip_off = htons((uint16_t)ip->ip_off); ++ ip->ip_sum = 0; ++ ip->ip_sum = cksum(m, hlen); ++ ++ if_output(so, m); ++ goto done; ++ } ++ ++ /* ++ * Too large for interface; fragment if possible. ++ * Must be able to put at least 8 bytes per fragment. ++ */ ++ if (ip->ip_off & IP_DF) { ++ error = -1; ++ goto bad; ++ } ++ ++ len = (slirp->if_mtu - hlen) & ~7; /* ip databytes per packet */ ++ if (len < 8) { ++ error = -1; ++ goto bad; ++ } ++ ++ { ++ int mhlen, firstlen = len; ++ struct mbuf **mnext = &m->m_nextpkt; ++ ++ /* ++ * Loop through length of segment after first fragment, ++ * make new header and copy data of each part and link onto chain. ++ */ ++ m0 = m; ++ mhlen = sizeof(struct ip); ++ for (off = hlen + len; off < (uint16_t)ip->ip_len; off += len) { ++ register struct ip *mhip; ++ m = m_get(slirp); ++ if (m == NULL) { ++ error = -1; ++ goto sendorfree; ++ } ++ m->m_data += IF_MAXLINKHDR; ++ mhip = mtod(m, struct ip *); ++ *mhip = *ip; ++ ++ m->m_len = mhlen; ++ mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF); ++ if (ip->ip_off & IP_MF) ++ mhip->ip_off |= IP_MF; ++ if (off + len >= (uint16_t)ip->ip_len) ++ len = (uint16_t)ip->ip_len - off; ++ else ++ mhip->ip_off |= IP_MF; ++ mhip->ip_len = htons((uint16_t)(len + mhlen)); ++ ++ if (m_copy(m, m0, off, len) < 0) { ++ error = -1; ++ goto sendorfree; ++ } ++ ++ mhip->ip_off = htons((uint16_t)mhip->ip_off); ++ mhip->ip_sum = 0; ++ mhip->ip_sum = cksum(m, mhlen); ++ *mnext = m; ++ mnext = &m->m_nextpkt; ++ } ++ /* ++ * Update first fragment by trimming what's been copied out ++ * and updating header, then send each fragment (in order). ++ */ ++ m = m0; ++ m_adj(m, hlen + firstlen - (uint16_t)ip->ip_len); ++ ip->ip_len = htons((uint16_t)m->m_len); ++ ip->ip_off = htons((uint16_t)(ip->ip_off | IP_MF)); ++ ip->ip_sum = 0; ++ ip->ip_sum = cksum(m, hlen); ++ sendorfree: ++ for (m = m0; m; m = m0) { ++ m0 = m->m_nextpkt; ++ m->m_nextpkt = NULL; ++ if (error == 0) ++ if_output(so, m); ++ else ++ m_free(m); ++ } ++ } ++ ++done: ++ return (error); ++ ++bad: ++ m_free(m0); ++ goto done; ++} +diff --git a/slirp/src/libslirp-version.h.in b/slirp/src/libslirp-version.h.in +new file mode 100644 +index 0000000000..faa6c85952 +--- /dev/null ++++ b/slirp/src/libslirp-version.h.in +@@ -0,0 +1,24 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++#ifndef LIBSLIRP_VERSION_H_ ++#define LIBSLIRP_VERSION_H_ ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++#define SLIRP_MAJOR_VERSION @SLIRP_MAJOR_VERSION@ ++#define SLIRP_MINOR_VERSION @SLIRP_MINOR_VERSION@ ++#define SLIRP_MICRO_VERSION @SLIRP_MICRO_VERSION@ ++#define SLIRP_VERSION_STRING @SLIRP_VERSION_STRING@ ++ ++#define SLIRP_CHECK_VERSION(major,minor,micro) \ ++ (SLIRP_MAJOR_VERSION > (major) || \ ++ (SLIRP_MAJOR_VERSION == (major) && SLIRP_MINOR_VERSION > (minor)) || \ ++ (SLIRP_MAJOR_VERSION == (major) && SLIRP_MINOR_VERSION == (minor) && \ ++ SLIRP_MICRO_VERSION >= (micro))) ++ ++#ifdef __cplusplus ++} /* extern "C" */ ++#endif ++ ++#endif /* LIBSLIRP_VERSION_H_ */ +diff --git a/slirp/src/libslirp.h b/slirp/src/libslirp.h +new file mode 100644 +index 0000000000..5760d53cea +--- /dev/null ++++ b/slirp/src/libslirp.h +@@ -0,0 +1,236 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++#ifndef LIBSLIRP_H ++#define LIBSLIRP_H ++ ++#include ++#include ++#include ++ ++#ifdef _WIN32 ++#include ++#include ++#include ++#else ++#include ++#include ++#endif ++ ++#include "libslirp-version.h" ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/* Opaque structure containing the slirp state */ ++typedef struct Slirp Slirp; ++ ++/* Flags passed to SlirpAddPollCb and to be returned by SlirpGetREventsCb. */ ++enum { ++ SLIRP_POLL_IN = 1 << 0, ++ SLIRP_POLL_OUT = 1 << 1, ++ SLIRP_POLL_PRI = 1 << 2, ++ SLIRP_POLL_ERR = 1 << 3, ++ SLIRP_POLL_HUP = 1 << 4, ++}; ++ ++typedef ssize_t (*SlirpReadCb)(void *buf, size_t len, void *opaque); ++typedef ssize_t (*SlirpWriteCb)(const void *buf, size_t len, void *opaque); ++typedef void (*SlirpTimerCb)(void *opaque); ++typedef int (*SlirpAddPollCb)(int fd, int events, void *opaque); ++typedef int (*SlirpGetREventsCb)(int idx, void *opaque); ++ ++/* ++ * Callbacks from slirp, to be set by the application. ++ * ++ * The opaque parameter is set to the opaque pointer given in the slirp_new / ++ * slirp_init call. ++ */ ++typedef struct SlirpCb { ++ /* ++ * Send an ethernet frame to the guest network. The opaque parameter is the ++ * one given to slirp_init(). If the guest is not ready to receive a frame, ++ * the function can just drop the data. TCP will then handle retransmissions ++ * at a lower pace. ++ * <0 reports an IO error. ++ */ ++ SlirpWriteCb send_packet; ++ /* Print a message for an error due to guest misbehavior. */ ++ void (*guest_error)(const char *msg, void *opaque); ++ /* Return the virtual clock value in nanoseconds */ ++ int64_t (*clock_get_ns)(void *opaque); ++ /* Create a new timer with the given callback and opaque data */ ++ void *(*timer_new)(SlirpTimerCb cb, void *cb_opaque, void *opaque); ++ /* Remove and free a timer */ ++ void (*timer_free)(void *timer, void *opaque); ++ /* Modify a timer to expire at @expire_time (ms) */ ++ void (*timer_mod)(void *timer, int64_t expire_time, void *opaque); ++ /* Register a fd for future polling */ ++ void (*register_poll_fd)(int fd, void *opaque); ++ /* Unregister a fd */ ++ void (*unregister_poll_fd)(int fd, void *opaque); ++ /* Kick the io-thread, to signal that new events may be processed */ ++ void (*notify)(void *opaque); ++} SlirpCb; ++ ++#define SLIRP_CONFIG_VERSION_MIN 1 ++#define SLIRP_CONFIG_VERSION_MAX 3 ++ ++typedef struct SlirpConfig { ++ /* Version must be provided */ ++ uint32_t version; ++ /* ++ * Fields introduced in SlirpConfig version 1 begin ++ */ ++ int restricted; ++ bool in_enabled; ++ struct in_addr vnetwork; ++ struct in_addr vnetmask; ++ struct in_addr vhost; ++ bool in6_enabled; ++ struct in6_addr vprefix_addr6; ++ uint8_t vprefix_len; ++ struct in6_addr vhost6; ++ const char *vhostname; ++ const char *tftp_server_name; ++ const char *tftp_path; ++ const char *bootfile; ++ struct in_addr vdhcp_start; ++ struct in_addr vnameserver; ++ struct in6_addr vnameserver6; ++ const char **vdnssearch; ++ const char *vdomainname; ++ /* Default: IF_MTU_DEFAULT */ ++ size_t if_mtu; ++ /* Default: IF_MRU_DEFAULT */ ++ size_t if_mru; ++ /* Prohibit connecting to 127.0.0.1:* */ ++ bool disable_host_loopback; ++ /* ++ * Enable emulation code (*warning*: this code isn't safe, it is not ++ * recommended to enable it) ++ */ ++ bool enable_emu; ++ /* ++ * Fields introduced in SlirpConfig version 2 begin ++ */ ++ struct sockaddr_in *outbound_addr; ++ struct sockaddr_in6 *outbound_addr6; ++ /* ++ * Fields introduced in SlirpConfig version 3 begin ++ */ ++ bool disable_dns; /* slirp will not redirect/serve any DNS packet */ ++} SlirpConfig; ++ ++/* Create a new instance of a slirp stack */ ++Slirp *slirp_new(const SlirpConfig *cfg, const SlirpCb *callbacks, ++ void *opaque); ++/* slirp_init is deprecated in favor of slirp_new */ ++Slirp *slirp_init(int restricted, bool in_enabled, struct in_addr vnetwork, ++ struct in_addr vnetmask, struct in_addr vhost, ++ bool in6_enabled, struct in6_addr vprefix_addr6, ++ uint8_t vprefix_len, struct in6_addr vhost6, ++ const char *vhostname, const char *tftp_server_name, ++ const char *tftp_path, const char *bootfile, ++ struct in_addr vdhcp_start, struct in_addr vnameserver, ++ struct in6_addr vnameserver6, const char **vdnssearch, ++ const char *vdomainname, const SlirpCb *callbacks, ++ void *opaque); ++/* Shut down an instance of a slirp stack */ ++void slirp_cleanup(Slirp *slirp); ++ ++/* This is called by the application when it is about to sleep through poll(). ++ * *timeout is set to the amount of virtual time (in ms) that the application intends to ++ * wait (UINT32_MAX if infinite). slirp_pollfds_fill updates it according to ++ * e.g. TCP timers, so the application knows it should sleep a smaller amount of ++ * time. slirp_pollfds_fill calls add_poll for each file descriptor ++ * that should be monitored along the sleep. The opaque pointer is passed as ++ * such to add_poll, and add_poll returns an index. */ ++void slirp_pollfds_fill(Slirp *slirp, uint32_t *timeout, ++ SlirpAddPollCb add_poll, void *opaque); ++ ++/* This is called by the application after sleeping, to report which file ++ * descriptors are available. slirp_pollfds_poll calls get_revents on each file ++ * descriptor, giving it the index that add_poll returned during the ++ * slirp_pollfds_fill call, to know whether the descriptor is available for ++ * read/write/etc. (SLIRP_POLL_*) ++ * select_error should be passed 1 if poll() returned an error. */ ++void slirp_pollfds_poll(Slirp *slirp, int select_error, ++ SlirpGetREventsCb get_revents, void *opaque); ++ ++/* This is called by the application when the guest emits a packet on the ++ * guest network, to be interpreted by slirp. */ ++void slirp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len); ++ ++/* These set up / remove port forwarding between a host port in the real world ++ * and the guest network. */ ++int slirp_add_hostfwd(Slirp *slirp, int is_udp, struct in_addr host_addr, ++ int host_port, struct in_addr guest_addr, int guest_port); ++int slirp_remove_hostfwd(Slirp *slirp, int is_udp, struct in_addr host_addr, ++ int host_port); ++ ++#define SLIRP_HOSTFWD_UDP 1 ++#define SLIRP_HOSTFWD_V6ONLY 2 ++int slirp_add_hostxfwd(Slirp *slirp, ++ const struct sockaddr *haddr, socklen_t haddrlen, ++ const struct sockaddr *gaddr, socklen_t gaddrlen, ++ int flags); ++int slirp_remove_hostxfwd(Slirp *slirp, ++ const struct sockaddr *haddr, socklen_t haddrlen, ++ int flags); ++ ++/* Set up port forwarding between a port in the guest network and a ++ * command running on the host */ ++int slirp_add_exec(Slirp *slirp, const char *cmdline, ++ struct in_addr *guest_addr, int guest_port); ++/* Set up port forwarding between a port in the guest network and a ++ * Unix port on the host */ ++int slirp_add_unix(Slirp *slirp, const char *unixsock, ++ struct in_addr *guest_addr, int guest_port); ++/* Set up port forwarding between a port in the guest network and a ++ * callback that will receive the data coming from the port */ ++int slirp_add_guestfwd(Slirp *slirp, SlirpWriteCb write_cb, void *opaque, ++ struct in_addr *guest_addr, int guest_port); ++ ++/* TODO: rather identify a guestfwd through an opaque pointer instead of through ++ * the guest_addr */ ++ ++/* This is called by the application for a guestfwd, to determine how much data ++ * can be received by the forwarded port through a call to slirp_socket_recv. */ ++size_t slirp_socket_can_recv(Slirp *slirp, struct in_addr guest_addr, ++ int guest_port); ++/* This is called by the application for a guestfwd, to provide the data to be ++ * sent on the forwarded port */ ++void slirp_socket_recv(Slirp *slirp, struct in_addr guest_addr, int guest_port, ++ const uint8_t *buf, int size); ++ ++/* Remove entries added by slirp_add_exec, slirp_add_unix or slirp_add_guestfwd */ ++int slirp_remove_guestfwd(Slirp *slirp, struct in_addr guest_addr, ++ int guest_port); ++ ++/* Return a human-readable state of the slirp stack */ ++char *slirp_connection_info(Slirp *slirp); ++ ++/* Return a human-readable state of the NDP/ARP tables */ ++char *slirp_neighbor_info(Slirp *slirp); ++ ++/* Save the slirp state through the write_cb. The opaque pointer is passed as ++ * such to the write_cb. */ ++void slirp_state_save(Slirp *s, SlirpWriteCb write_cb, void *opaque); ++ ++/* Returns the version of the slirp state, to be saved along the state */ ++int slirp_state_version(void); ++ ++/* Load the slirp state through the read_cb. The opaque pointer is passed as ++ * such to the read_cb. The version should be given as it was obtained from ++ * slirp_state_version when slirp_state_save was called. */ ++int slirp_state_load(Slirp *s, int version_id, SlirpReadCb read_cb, ++ void *opaque); ++ ++/* Return the version of the slirp implementation */ ++const char *slirp_version_string(void); ++ ++#ifdef __cplusplus ++} /* extern "C" */ ++#endif ++ ++#endif /* LIBSLIRP_H */ +diff --git a/slirp/src/libslirp.map b/slirp/src/libslirp.map +new file mode 100644 +index 0000000000..792b0a94ab +--- /dev/null ++++ b/slirp/src/libslirp.map +@@ -0,0 +1,36 @@ ++SLIRP_4.0 { ++global: ++ slirp_add_exec; ++ slirp_add_guestfwd; ++ slirp_add_hostfwd; ++ slirp_cleanup; ++ slirp_connection_info; ++ slirp_init; ++ slirp_input; ++ slirp_pollfds_fill; ++ slirp_pollfds_poll; ++ slirp_remove_hostfwd; ++ slirp_socket_can_recv; ++ slirp_socket_recv; ++ slirp_state_load; ++ slirp_state_save; ++ slirp_state_version; ++ slirp_version_string; ++local: ++ *; ++}; ++ ++SLIRP_4.1 { ++ slirp_new; ++} SLIRP_4.0; ++ ++SLIRP_4.2 { ++ slirp_add_unix; ++ slirp_remove_guestfwd; ++} SLIRP_4.1; ++ ++SLIRP_4.5 { ++ slirp_add_hostxfwd; ++ slirp_remove_hostxfwd; ++ slirp_neighbor_info; ++} SLIRP_4.2; +diff --git a/slirp/src/main.h b/slirp/src/main.h +new file mode 100644 +index 0000000000..3b3f883703 +--- /dev/null ++++ b/slirp/src/main.h +@@ -0,0 +1,16 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef SLIRP_MAIN_H ++#define SLIRP_MAIN_H ++ ++extern unsigned curtime; ++extern struct in_addr loopback_addr; ++extern unsigned long loopback_mask; ++ ++int if_encap(Slirp *slirp, struct mbuf *ifm); ++ssize_t slirp_send(struct socket *so, const void *buf, size_t len, int flags); ++ ++#endif +diff --git a/slirp/src/mbuf.c b/slirp/src/mbuf.c +new file mode 100644 +index 0000000000..36864a401f +--- /dev/null ++++ b/slirp/src/mbuf.c +@@ -0,0 +1,281 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski ++ */ ++ ++/* ++ * mbuf's in SLiRP are much simpler than the real mbufs in ++ * FreeBSD. They are fixed size, determined by the MTU, ++ * so that one whole packet can fit. Mbuf's cannot be ++ * chained together. If there's more data than the mbuf ++ * could hold, an external g_malloced buffer is pointed to ++ * by m_ext (and the data pointers) and M_EXT is set in ++ * the flags ++ */ ++ ++#include "slirp.h" ++ ++#define MBUF_THRESH 30 ++ ++/* ++ * Find a nice value for msize ++ */ ++#define SLIRP_MSIZE(mtu) \ ++ (offsetof(struct mbuf, m_dat) + IF_MAXLINKHDR + TCPIPHDR_DELTA + (mtu)) ++ ++void m_init(Slirp *slirp) ++{ ++ slirp->m_freelist.qh_link = slirp->m_freelist.qh_rlink = &slirp->m_freelist; ++ slirp->m_usedlist.qh_link = slirp->m_usedlist.qh_rlink = &slirp->m_usedlist; ++} ++ ++static void m_cleanup_list(struct quehead *list_head) ++{ ++ struct mbuf *m, *next; ++ ++ m = (struct mbuf *)list_head->qh_link; ++ while ((struct quehead *)m != list_head) { ++ next = m->m_next; ++ if (m->m_flags & M_EXT) { ++ g_free(m->m_ext); ++ } ++ g_free(m); ++ m = next; ++ } ++ list_head->qh_link = list_head; ++ list_head->qh_rlink = list_head; ++} ++ ++void m_cleanup(Slirp *slirp) ++{ ++ m_cleanup_list(&slirp->m_usedlist); ++ m_cleanup_list(&slirp->m_freelist); ++ m_cleanup_list(&slirp->if_batchq); ++ m_cleanup_list(&slirp->if_fastq); ++} ++ ++/* ++ * Get an mbuf from the free list, if there are none ++ * allocate one ++ * ++ * Because fragmentation can occur if we alloc new mbufs and ++ * free old mbufs, we mark all mbufs above mbuf_thresh as M_DOFREE, ++ * which tells m_free to actually g_free() it ++ */ ++struct mbuf *m_get(Slirp *slirp) ++{ ++ register struct mbuf *m; ++ int flags = 0; ++ ++ DEBUG_CALL("m_get"); ++ ++ if (MBUF_DEBUG || slirp->m_freelist.qh_link == &slirp->m_freelist) { ++ m = g_malloc(SLIRP_MSIZE(slirp->if_mtu)); ++ slirp->mbuf_alloced++; ++ if (MBUF_DEBUG || slirp->mbuf_alloced > MBUF_THRESH) ++ flags = M_DOFREE; ++ m->slirp = slirp; ++ } else { ++ m = (struct mbuf *)slirp->m_freelist.qh_link; ++ remque(m); ++ } ++ ++ /* Insert it in the used list */ ++ insque(m, &slirp->m_usedlist); ++ m->m_flags = (flags | M_USEDLIST); ++ ++ /* Initialise it */ ++ m->m_size = SLIRP_MSIZE(slirp->if_mtu) - offsetof(struct mbuf, m_dat); ++ m->m_data = m->m_dat; ++ m->m_len = 0; ++ m->m_nextpkt = NULL; ++ m->m_prevpkt = NULL; ++ m->resolution_requested = false; ++ m->expiration_date = (uint64_t)-1; ++ DEBUG_ARG("m = %p", m); ++ return m; ++} ++ ++void m_free(struct mbuf *m) ++{ ++ DEBUG_CALL("m_free"); ++ DEBUG_ARG("m = %p", m); ++ ++ if (m) { ++ /* Remove from m_usedlist */ ++ if (m->m_flags & M_USEDLIST) ++ remque(m); ++ ++ /* If it's M_EXT, free() it */ ++ if (m->m_flags & M_EXT) { ++ g_free(m->m_ext); ++ m->m_flags &= ~M_EXT; ++ } ++ /* ++ * Either free() it or put it on the free list ++ */ ++ if (m->m_flags & M_DOFREE) { ++ m->slirp->mbuf_alloced--; ++ g_free(m); ++ } else if ((m->m_flags & M_FREELIST) == 0) { ++ insque(m, &m->slirp->m_freelist); ++ m->m_flags = M_FREELIST; /* Clobber other flags */ ++ } ++ } /* if(m) */ ++} ++ ++/* ++ * Copy data from one mbuf to the end of ++ * the other.. if result is too big for one mbuf, allocate ++ * an M_EXT data segment ++ */ ++void m_cat(struct mbuf *m, struct mbuf *n) ++{ ++ /* ++ * If there's no room, realloc ++ */ ++ if (M_FREEROOM(m) < n->m_len) ++ m_inc(m, m->m_len + n->m_len); ++ ++ memcpy(m->m_data + m->m_len, n->m_data, n->m_len); ++ m->m_len += n->m_len; ++ ++ m_free(n); ++} ++ ++ ++/* make m 'size' bytes large from m_data */ ++void m_inc(struct mbuf *m, int size) ++{ ++ int gapsize; ++ ++ /* some compilers throw up on gotos. This one we can fake. */ ++ if (M_ROOM(m) > size) { ++ return; ++ } ++ ++ if (m->m_flags & M_EXT) { ++ gapsize = m->m_data - m->m_ext; ++ m->m_ext = g_realloc(m->m_ext, size + gapsize); ++ } else { ++ gapsize = m->m_data - m->m_dat; ++ m->m_ext = g_malloc(size + gapsize); ++ memcpy(m->m_ext, m->m_dat, m->m_size); ++ m->m_flags |= M_EXT; ++ } ++ ++ m->m_data = m->m_ext + gapsize; ++ m->m_size = size + gapsize; ++} ++ ++ ++void m_adj(struct mbuf *m, int len) ++{ ++ if (m == NULL) ++ return; ++ if (len >= 0) { ++ /* Trim from head */ ++ m->m_data += len; ++ m->m_len -= len; ++ } else { ++ /* Trim from tail */ ++ len = -len; ++ m->m_len -= len; ++ } ++} ++ ++ ++/* ++ * Copy len bytes from m, starting off bytes into n ++ */ ++int m_copy(struct mbuf *n, struct mbuf *m, int off, int len) ++{ ++ if (len > M_FREEROOM(n)) ++ return -1; ++ ++ memcpy((n->m_data + n->m_len), (m->m_data + off), len); ++ n->m_len += len; ++ return 0; ++} ++ ++ ++/* ++ * Given a pointer into an mbuf, return the mbuf ++ * XXX This is a kludge, I should eliminate the need for it ++ * Fortunately, it's not used often ++ */ ++struct mbuf *dtom(Slirp *slirp, void *dat) ++{ ++ struct mbuf *m; ++ ++ DEBUG_CALL("dtom"); ++ DEBUG_ARG("dat = %p", dat); ++ ++ /* bug corrected for M_EXT buffers */ ++ for (m = (struct mbuf *)slirp->m_usedlist.qh_link; ++ (struct quehead *)m != &slirp->m_usedlist; m = m->m_next) { ++ if (m->m_flags & M_EXT) { ++ if ((char *)dat >= m->m_ext && (char *)dat < (m->m_ext + m->m_size)) ++ return m; ++ } else { ++ if ((char *)dat >= m->m_dat && (char *)dat < (m->m_dat + m->m_size)) ++ return m; ++ } ++ } ++ ++ DEBUG_ERROR("dtom failed"); ++ ++ return (struct mbuf *)0; ++} ++ ++/* ++ * Duplicate the mbuf ++ * ++ * copy_header specifies whether the bytes before m_data should also be copied. ++ * header_size specifies how many bytes are to be reserved before m_data. ++ */ ++struct mbuf *m_dup(Slirp *slirp, struct mbuf *m, ++ bool copy_header, ++ size_t header_size) ++{ ++ struct mbuf *n; ++ int mcopy_result; ++ ++ /* The previous mbuf was supposed to have it already, we can check it along ++ * the way */ ++ assert(M_ROOMBEFORE(m) >= header_size); ++ ++ n = m_get(slirp); ++ m_inc(n, m->m_len + header_size); ++ ++ if (copy_header) { ++ m->m_len += header_size; ++ m->m_data -= header_size; ++ mcopy_result = m_copy(n, m, 0, m->m_len + header_size); ++ n->m_data += header_size; ++ m->m_len -= header_size; ++ m->m_data += header_size; ++ } else { ++ n->m_data += header_size; ++ mcopy_result = m_copy(n, m, 0, m->m_len); ++ } ++ g_assert(mcopy_result == 0); ++ ++ return n; ++} ++ ++void *mtod_check(struct mbuf *m, size_t len) ++{ ++ if (m->m_len >= len) { ++ return m->m_data; ++ } ++ ++ DEBUG_ERROR("mtod failed"); ++ ++ return NULL; ++} ++ ++void *m_end(struct mbuf *m) ++{ ++ return m->m_data + m->m_len; ++} +diff --git a/slirp/src/mbuf.h b/slirp/src/mbuf.h +new file mode 100644 +index 0000000000..34e697a914 +--- /dev/null ++++ b/slirp/src/mbuf.h +@@ -0,0 +1,192 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)mbuf.h 8.3 (Berkeley) 1/21/94 ++ * mbuf.h,v 1.9 1994/11/14 13:54:20 bde Exp ++ */ ++ ++#ifndef MBUF_H ++#define MBUF_H ++ ++/* ++ * Macros for type conversion ++ * mtod(m,t) - convert mbuf pointer to data pointer of correct type ++ */ ++#define mtod(m, t) ((t)(m)->m_data) ++ ++/* XXX About mbufs for slirp: ++ * Only one mbuf is ever used in a chain, for each "cell" of data. ++ * m_nextpkt points to the next packet, if fragmented. ++ * If the data is too large, the M_EXT is used, and a larger block ++ * is alloced. Therefore, m_free[m] must check for M_EXT and if set ++ * free the m_ext. This is inefficient memory-wise, but who cares. ++ */ ++ ++/* ++ * mbufs allow to have a gap between the start of the allocated buffer (m_ext if ++ * M_EXT is set, m_dat otherwise) and the in-use data: ++ * ++ * |--gapsize----->|---m_len-------> ++ * |----------m_size------------------------------> ++ * |----M_ROOM--------------------> ++ * |-M_FREEROOM--> ++ * ++ * ^ ^ ^ ++ * m_dat/m_ext m_data end of buffer ++ */ ++ ++/* ++ * How much room is in the mbuf, from m_data to the end of the mbuf ++ */ ++#define M_ROOM(m) \ ++ ((m->m_flags & M_EXT) ? (((m)->m_ext + (m)->m_size) - (m)->m_data) : \ ++ (((m)->m_dat + (m)->m_size) - (m)->m_data)) ++ ++/* ++ * How much free room there is ++ */ ++#define M_FREEROOM(m) (M_ROOM(m) - (m)->m_len) ++ ++/* ++ * How much free room there is before m_data ++ */ ++#define M_ROOMBEFORE(m) \ ++ (((m)->m_flags & M_EXT) ? (m)->m_data - (m)->m_ext \ ++ : (m)->m_data - (m)->m_dat) ++ ++struct mbuf { ++ /* XXX should union some of these! */ ++ /* header at beginning of each mbuf: */ ++ struct mbuf *m_next; /* Linked list of mbufs */ ++ struct mbuf *m_prev; ++ struct mbuf *m_nextpkt; /* Next packet in queue/record */ ++ struct mbuf *m_prevpkt; /* Flags aren't used in the output queue */ ++ int m_flags; /* Misc flags */ ++ ++ int m_size; /* Size of mbuf, from m_dat or m_ext */ ++ struct socket *m_so; ++ ++ char *m_data; /* Current location of data */ ++ int m_len; /* Amount of data in this mbuf, from m_data */ ++ ++ Slirp *slirp; ++ bool resolution_requested; ++ uint64_t expiration_date; ++ char *m_ext; ++ /* start of dynamic buffer area, must be last element */ ++ char m_dat[]; ++}; ++ ++#define ifq_prev m_prev ++#define ifq_next m_next ++#define ifs_prev m_prevpkt ++#define ifs_next m_nextpkt ++#define ifq_so m_so ++ ++#define M_EXT 0x01 /* m_ext points to more (malloced) data */ ++#define M_FREELIST 0x02 /* mbuf is on free list */ ++#define M_USEDLIST 0x04 /* XXX mbuf is on used list (for dtom()) */ ++#define M_DOFREE \ ++ 0x08 /* when m_free is called on the mbuf, free() \ ++ * it rather than putting it on the free list */ ++ ++void m_init(Slirp *); ++void m_cleanup(Slirp *slirp); ++struct mbuf *m_get(Slirp *); ++void m_free(struct mbuf *); ++void m_cat(register struct mbuf *, register struct mbuf *); ++void m_inc(struct mbuf *, int); ++void m_adj(struct mbuf *, int); ++int m_copy(struct mbuf *, struct mbuf *, int, int); ++struct mbuf *m_dup(Slirp *slirp, struct mbuf *m, bool copy_header, size_t header_size); ++struct mbuf *dtom(Slirp *, void *); ++void *mtod_check(struct mbuf *, size_t len); ++void *m_end(struct mbuf *); ++ ++static inline void ifs_init(struct mbuf *ifm) ++{ ++ ifm->ifs_next = ifm->ifs_prev = ifm; ++} ++ ++#ifdef DEBUG ++# define MBUF_DEBUG 1 ++#else ++# ifdef HAVE_VALGRIND ++# include ++# define MBUF_DEBUG RUNNING_ON_VALGRIND ++# else ++# define MBUF_DEBUG 0 ++# endif ++#endif ++ ++/* ++ * When a function is given an mbuf as well as the responsibility to free it, we ++ * want valgrind etc. to properly identify the new responsible for the ++ * free. Achieve this by making a new copy. For instance: ++ * ++ * f0(void) { ++ * struct mbuf *m = m_get(slirp); ++ * [...] ++ * switch (something) { ++ * case 1: ++ * f1(m); ++ * break; ++ * case 2: ++ * f2(m); ++ * break; ++ * [...] ++ * } ++ * } ++ * ++ * f1(struct mbuf *m) { ++ * M_DUP_DEBUG(m->slirp, m); ++ * [...] ++ * m_free(m); // but author of f1 might be forgetting this ++ * } ++ * ++ * f0 transfers the freeing responsibility to f1, f2, etc. Without the ++ * M_DUP_DEBUG call in f1, valgrind would tell us that it is f0 where the buffer ++ * was allocated, but it's difficult to know whether a leak is actually in f0, ++ * or in f1, or in f2, etc. Duplicating the mbuf in M_DUP_DEBUG each time the ++ * responsibility is transferred allows to immediately know where the leak ++ * actually is. ++ */ ++#define M_DUP_DEBUG(slirp, m, copy_header, header_size) do { \ ++ if (MBUF_DEBUG) { \ ++ struct mbuf *__n; \ ++ __n = m_dup((slirp), (m), (copy_header), (header_size)); \ ++ m_free(m); \ ++ (m) = __n; \ ++ } else { \ ++ (void) (slirp); (void) (copy_header); \ ++ g_assert(M_ROOMBEFORE(m) >= (header_size)); \ ++ } \ ++} while(0) ++ ++#endif +diff --git a/slirp/src/misc.c b/slirp/src/misc.c +new file mode 100644 +index 0000000000..48f180be43 +--- /dev/null ++++ b/slirp/src/misc.c +@@ -0,0 +1,440 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++#ifdef G_OS_UNIX ++#include ++#endif ++ ++inline void insque(void *a, void *b) ++{ ++ register struct quehead *element = (struct quehead *)a; ++ register struct quehead *head = (struct quehead *)b; ++ element->qh_link = head->qh_link; ++ head->qh_link = (struct quehead *)element; ++ element->qh_rlink = (struct quehead *)head; ++ ((struct quehead *)(element->qh_link))->qh_rlink = ++ (struct quehead *)element; ++} ++ ++inline void remque(void *a) ++{ ++ register struct quehead *element = (struct quehead *)a; ++ ((struct quehead *)(element->qh_link))->qh_rlink = element->qh_rlink; ++ ((struct quehead *)(element->qh_rlink))->qh_link = element->qh_link; ++ element->qh_rlink = NULL; ++} ++ ++/* TODO: IPv6 */ ++struct gfwd_list *add_guestfwd(struct gfwd_list **ex_ptr, SlirpWriteCb write_cb, ++ void *opaque, struct in_addr addr, int port) ++{ ++ struct gfwd_list *f = g_new0(struct gfwd_list, 1); ++ ++ f->write_cb = write_cb; ++ f->opaque = opaque; ++ f->ex_fport = port; ++ f->ex_addr = addr; ++ f->ex_next = *ex_ptr; ++ *ex_ptr = f; ++ ++ return f; ++} ++ ++struct gfwd_list *add_exec(struct gfwd_list **ex_ptr, const char *cmdline, ++ struct in_addr addr, int port) ++{ ++ struct gfwd_list *f = add_guestfwd(ex_ptr, NULL, NULL, addr, port); ++ ++ f->ex_exec = g_strdup(cmdline); ++ ++ return f; ++} ++ ++struct gfwd_list *add_unix(struct gfwd_list **ex_ptr, const char *unixsock, ++ struct in_addr addr, int port) ++{ ++ struct gfwd_list *f = add_guestfwd(ex_ptr, NULL, NULL, addr, port); ++ ++ f->ex_unix = g_strdup(unixsock); ++ ++ return f; ++} ++ ++int remove_guestfwd(struct gfwd_list **ex_ptr, struct in_addr addr, int port) ++{ ++ for (; *ex_ptr != NULL; ex_ptr = &((*ex_ptr)->ex_next)) { ++ struct gfwd_list *f = *ex_ptr; ++ if (f->ex_addr.s_addr == addr.s_addr && f->ex_fport == port) { ++ *ex_ptr = f->ex_next; ++ g_free(f->ex_exec); ++ g_free(f); ++ return 0; ++ } ++ } ++ return -1; ++} ++ ++static int slirp_socketpair_with_oob(int sv[2]) ++{ ++ struct sockaddr_in addr = { ++ .sin_family = AF_INET, ++ .sin_port = 0, ++ .sin_addr.s_addr = INADDR_ANY, ++ }; ++ socklen_t addrlen = sizeof(addr); ++ int ret, s; ++ ++ sv[1] = -1; ++ s = slirp_socket(AF_INET, SOCK_STREAM, 0); ++ if (s < 0 || bind(s, (struct sockaddr *)&addr, addrlen) < 0 || ++ listen(s, 1) < 0 || ++ getsockname(s, (struct sockaddr *)&addr, &addrlen) < 0) { ++ goto err; ++ } ++ ++ sv[1] = slirp_socket(AF_INET, SOCK_STREAM, 0); ++ if (sv[1] < 0) { ++ goto err; ++ } ++ /* ++ * This connect won't block because we've already listen()ed on ++ * the server end (even though we won't accept() the connection ++ * until later on). ++ */ ++ do { ++ ret = connect(sv[1], (struct sockaddr *)&addr, addrlen); ++ } while (ret < 0 && errno == EINTR); ++ if (ret < 0) { ++ goto err; ++ } ++ ++ do { ++ sv[0] = accept(s, (struct sockaddr *)&addr, &addrlen); ++ } while (sv[0] < 0 && errno == EINTR); ++ if (sv[0] < 0) { ++ goto err; ++ } ++ ++ closesocket(s); ++ return 0; ++ ++err: ++ g_critical("slirp_socketpair(): %s", strerror(errno)); ++ if (s >= 0) { ++ closesocket(s); ++ } ++ if (sv[1] >= 0) { ++ closesocket(sv[1]); ++ } ++ return -1; ++} ++ ++static void fork_exec_child_setup(gpointer data) ++{ ++#ifndef _WIN32 ++ setsid(); ++ ++ /* Unblock all signals and leave our exec()-ee to block what it wants */ ++ sigset_t ss; ++ sigemptyset(&ss); ++ sigprocmask(SIG_SETMASK, &ss, NULL); ++ ++ /* POSIX is obnoxious about SIGCHLD specifically across exec() */ ++ signal(SIGCHLD, SIG_DFL); ++#endif ++} ++ ++#pragma GCC diagnostic push ++#pragma GCC diagnostic ignored "-Wdeprecated-declarations" ++ ++#if !GLIB_CHECK_VERSION(2, 58, 0) ++typedef struct SlirpGSpawnFds { ++ GSpawnChildSetupFunc child_setup; ++ gpointer user_data; ++ gint stdin_fd; ++ gint stdout_fd; ++ gint stderr_fd; ++} SlirpGSpawnFds; ++ ++static inline void slirp_gspawn_fds_setup(gpointer user_data) ++{ ++ SlirpGSpawnFds *q = (SlirpGSpawnFds *)user_data; ++ ++ dup2(q->stdin_fd, 0); ++ dup2(q->stdout_fd, 1); ++ dup2(q->stderr_fd, 2); ++ q->child_setup(q->user_data); ++} ++#endif ++ ++static inline gboolean ++g_spawn_async_with_fds_slirp(const gchar *working_directory, gchar **argv, ++ gchar **envp, GSpawnFlags flags, ++ GSpawnChildSetupFunc child_setup, ++ gpointer user_data, GPid *child_pid, gint stdin_fd, ++ gint stdout_fd, gint stderr_fd, GError **error) ++{ ++#if GLIB_CHECK_VERSION(2, 58, 0) ++ return g_spawn_async_with_fds(working_directory, argv, envp, flags, ++ child_setup, user_data, child_pid, stdin_fd, ++ stdout_fd, stderr_fd, error); ++#else ++ SlirpGSpawnFds setup = { ++ .child_setup = child_setup, ++ .user_data = user_data, ++ .stdin_fd = stdin_fd, ++ .stdout_fd = stdout_fd, ++ .stderr_fd = stderr_fd, ++ }; ++ ++ return g_spawn_async(working_directory, argv, envp, flags, ++ slirp_gspawn_fds_setup, &setup, child_pid, error); ++#endif ++} ++ ++#define g_spawn_async_with_fds(wd, argv, env, f, c, d, p, ifd, ofd, efd, err) \ ++ g_spawn_async_with_fds_slirp(wd, argv, env, f, c, d, p, ifd, ofd, efd, err) ++ ++#pragma GCC diagnostic pop ++ ++int fork_exec(struct socket *so, const char *ex) ++{ ++ GError *err = NULL; ++ gint argc = 0; ++ gchar **argv = NULL; ++ int opt, sp[2]; ++ ++ DEBUG_CALL("fork_exec"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("ex = %p", ex); ++ ++ if (slirp_socketpair_with_oob(sp) < 0) { ++ return 0; ++ } ++ ++ if (!g_shell_parse_argv(ex, &argc, &argv, &err)) { ++ g_critical("fork_exec invalid command: %s\nerror: %s", ex, err->message); ++ g_error_free(err); ++ return 0; ++ } ++ ++ g_spawn_async_with_fds(NULL /* cwd */, argv, NULL /* env */, ++ G_SPAWN_SEARCH_PATH, fork_exec_child_setup, ++ NULL /* data */, NULL /* child_pid */, sp[1], sp[1], ++ sp[1], &err); ++ g_strfreev(argv); ++ ++ if (err) { ++ g_critical("fork_exec: %s", err->message); ++ g_error_free(err); ++ closesocket(sp[0]); ++ closesocket(sp[1]); ++ return 0; ++ } ++ ++ so->s = sp[0]; ++ closesocket(sp[1]); ++ slirp_socket_set_fast_reuse(so->s); ++ opt = 1; ++ setsockopt(so->s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int)); ++ slirp_set_nonblock(so->s); ++ so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); ++ return 1; ++} ++ ++int open_unix(struct socket *so, const char *unixpath) ++{ ++#ifdef G_OS_UNIX ++ struct sockaddr_un sa; ++ int s; ++ ++ DEBUG_CALL("open_unix"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("unixpath = %s", unixpath); ++ ++ memset(&sa, 0, sizeof(sa)); ++ sa.sun_family = AF_UNIX; ++ if (g_strlcpy(sa.sun_path, unixpath, sizeof(sa.sun_path)) >= sizeof(sa.sun_path)) { ++ g_critical("Bad unix path: %s", unixpath); ++ return 0; ++ } ++ ++ s = slirp_socket(PF_UNIX, SOCK_STREAM, 0); ++ if (s < 0) { ++ g_critical("open_unix(): %s", strerror(errno)); ++ return 0; ++ } ++ ++ if (connect(s, (struct sockaddr *)&sa, sizeof(sa)) < 0) { ++ g_critical("open_unix(): %s", strerror(errno)); ++ closesocket(s); ++ return 0; ++ } ++ ++ so->s = s; ++ slirp_set_nonblock(so->s); ++ so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); ++ ++ return 1; ++#else ++ g_assert_not_reached(); ++#endif ++} ++ ++char *slirp_connection_info(Slirp *slirp) ++{ ++ GString *str = g_string_new(NULL); ++ const char *const tcpstates[] = { ++ [TCPS_CLOSED] = "CLOSED", [TCPS_LISTEN] = "LISTEN", ++ [TCPS_SYN_SENT] = "SYN_SENT", [TCPS_SYN_RECEIVED] = "SYN_RCVD", ++ [TCPS_ESTABLISHED] = "ESTABLISHED", [TCPS_CLOSE_WAIT] = "CLOSE_WAIT", ++ [TCPS_FIN_WAIT_1] = "FIN_WAIT_1", [TCPS_CLOSING] = "CLOSING", ++ [TCPS_LAST_ACK] = "LAST_ACK", [TCPS_FIN_WAIT_2] = "FIN_WAIT_2", ++ [TCPS_TIME_WAIT] = "TIME_WAIT", ++ }; ++ struct in_addr dst_addr; ++ struct sockaddr_in src; ++ socklen_t src_len; ++ uint16_t dst_port; ++ struct socket *so; ++ const char *state; ++ char buf[20]; ++ ++ g_string_append_printf(str, ++ " Protocol[State] FD Source Address Port " ++ "Dest. Address Port RecvQ SendQ\n"); ++ ++ /* TODO: IPv6 */ ++ ++ for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so->so_next) { ++ if (so->so_state & SS_HOSTFWD) { ++ state = "HOST_FORWARD"; ++ } else if (so->so_tcpcb) { ++ state = tcpstates[so->so_tcpcb->t_state]; ++ } else { ++ state = "NONE"; ++ } ++ if (so->so_state & (SS_HOSTFWD | SS_INCOMING)) { ++ src_len = sizeof(src); ++ getsockname(so->s, (struct sockaddr *)&src, &src_len); ++ dst_addr = so->so_laddr; ++ dst_port = so->so_lport; ++ } else { ++ src.sin_addr = so->so_laddr; ++ src.sin_port = so->so_lport; ++ dst_addr = so->so_faddr; ++ dst_port = so->so_fport; ++ } ++ slirp_fmt0(buf, sizeof(buf), " TCP[%s]", state); ++ g_string_append_printf(str, "%-19s %3d %15s %5d ", buf, so->s, ++ src.sin_addr.s_addr ? inet_ntoa(src.sin_addr) : ++ "*", ++ ntohs(src.sin_port)); ++ g_string_append_printf(str, "%15s %5d %5d %5d\n", inet_ntoa(dst_addr), ++ ntohs(dst_port), so->so_rcv.sb_cc, ++ so->so_snd.sb_cc); ++ } ++ ++ for (so = slirp->udb.so_next; so != &slirp->udb; so = so->so_next) { ++ if (so->so_state & SS_HOSTFWD) { ++ slirp_fmt0(buf, sizeof(buf), " UDP[HOST_FORWARD]"); ++ src_len = sizeof(src); ++ getsockname(so->s, (struct sockaddr *)&src, &src_len); ++ dst_addr = so->so_laddr; ++ dst_port = so->so_lport; ++ } else { ++ slirp_fmt0(buf, sizeof(buf), " UDP[%d sec]", ++ (so->so_expire - curtime) / 1000); ++ src.sin_addr = so->so_laddr; ++ src.sin_port = so->so_lport; ++ dst_addr = so->so_faddr; ++ dst_port = so->so_fport; ++ } ++ g_string_append_printf(str, "%-19s %3d %15s %5d ", buf, so->s, ++ src.sin_addr.s_addr ? inet_ntoa(src.sin_addr) : ++ "*", ++ ntohs(src.sin_port)); ++ g_string_append_printf(str, "%15s %5d %5d %5d\n", inet_ntoa(dst_addr), ++ ntohs(dst_port), so->so_rcv.sb_cc, ++ so->so_snd.sb_cc); ++ } ++ ++ for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so->so_next) { ++ slirp_fmt0(buf, sizeof(buf), " ICMP[%d sec]", ++ (so->so_expire - curtime) / 1000); ++ src.sin_addr = so->so_laddr; ++ dst_addr = so->so_faddr; ++ g_string_append_printf(str, "%-19s %3d %15s - ", buf, so->s, ++ src.sin_addr.s_addr ? inet_ntoa(src.sin_addr) : ++ "*"); ++ g_string_append_printf(str, "%15s - %5d %5d\n", inet_ntoa(dst_addr), ++ so->so_rcv.sb_cc, so->so_snd.sb_cc); ++ } ++ ++ return g_string_free(str, FALSE); ++} ++ ++char *slirp_neighbor_info(Slirp *slirp) ++{ ++ GString *str = g_string_new(NULL); ++ ArpTable *arp_table = &slirp->arp_table; ++ NdpTable *ndp_table = &slirp->ndp_table; ++ char ip_addr[INET6_ADDRSTRLEN]; ++ char eth_addr[ETH_ADDRSTRLEN]; ++ const char *ip; ++ ++ g_string_append_printf(str, " %5s %-17s %s\n", ++ "Table", "MacAddr", "IP Address"); ++ ++ for (int i = 0; i < ARP_TABLE_SIZE; ++i) { ++ struct in_addr addr; ++ addr.s_addr = arp_table->table[i].ar_sip; ++ if (!addr.s_addr) { ++ continue; ++ } ++ ip = inet_ntop(AF_INET, &addr, ip_addr, sizeof(ip_addr)); ++ g_assert(ip != NULL); ++ g_string_append_printf(str, " %5s %-17s %s\n", "ARP", ++ slirp_ether_ntoa(arp_table->table[i].ar_sha, ++ eth_addr, sizeof(eth_addr)), ++ ip); ++ } ++ ++ for (int i = 0; i < NDP_TABLE_SIZE; ++i) { ++ if (in6_zero(&ndp_table->table[i].ip_addr)) { ++ continue; ++ } ++ ip = inet_ntop(AF_INET6, &ndp_table->table[i].ip_addr, ip_addr, ++ sizeof(ip_addr)); ++ g_assert(ip != NULL); ++ g_string_append_printf(str, " %5s %-17s %s\n", "NDP", ++ slirp_ether_ntoa(ndp_table->table[i].eth_addr, ++ eth_addr, sizeof(eth_addr)), ++ ip); ++ } ++ ++ return g_string_free(str, FALSE); ++} ++ ++int slirp_bind_outbound(struct socket *so, unsigned short af) ++{ ++ int ret = 0; ++ struct sockaddr *addr = NULL; ++ int addr_size = 0; ++ ++ if (af == AF_INET && so->slirp->outbound_addr != NULL) { ++ addr = (struct sockaddr *)so->slirp->outbound_addr; ++ addr_size = sizeof(struct sockaddr_in); ++ } else if (af == AF_INET6 && so->slirp->outbound_addr6 != NULL) { ++ addr = (struct sockaddr *)so->slirp->outbound_addr6; ++ addr_size = sizeof(struct sockaddr_in6); ++ } ++ ++ if (addr != NULL) { ++ ret = bind(so->s, addr, addr_size); ++ } ++ return ret; ++} +diff --git a/slirp/src/misc.h b/slirp/src/misc.h +new file mode 100644 +index 0000000000..81b370cfb1 +--- /dev/null ++++ b/slirp/src/misc.h +@@ -0,0 +1,72 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef MISC_H ++#define MISC_H ++ ++#include "libslirp.h" ++ ++struct gfwd_list { ++ SlirpWriteCb write_cb; ++ void *opaque; ++ struct in_addr ex_addr; /* Server address */ ++ int ex_fport; /* Port to telnet to */ ++ char *ex_exec; /* Command line of what to exec */ ++ char *ex_unix; /* unix socket */ ++ struct gfwd_list *ex_next; ++}; ++ ++#define EMU_NONE 0x0 ++ ++/* TCP emulations */ ++#define EMU_CTL 0x1 ++#define EMU_FTP 0x2 ++#define EMU_KSH 0x3 ++#define EMU_IRC 0x4 ++#define EMU_REALAUDIO 0x5 ++#define EMU_RLOGIN 0x6 ++#define EMU_IDENT 0x7 ++ ++#define EMU_NOCONNECT 0x10 /* Don't connect */ ++ ++struct tos_t { ++ uint16_t lport; ++ uint16_t fport; ++ uint8_t tos; ++ uint8_t emu; ++}; ++ ++struct emu_t { ++ uint16_t lport; ++ uint16_t fport; ++ uint8_t tos; ++ uint8_t emu; ++ struct emu_t *next; ++}; ++ ++struct slirp_quehead { ++ struct slirp_quehead *qh_link; ++ struct slirp_quehead *qh_rlink; ++}; ++ ++void slirp_insque(void *, void *); ++void slirp_remque(void *); ++int fork_exec(struct socket *so, const char *ex); ++int open_unix(struct socket *so, const char *unixsock); ++ ++struct gfwd_list *add_guestfwd(struct gfwd_list **ex_ptr, SlirpWriteCb write_cb, ++ void *opaque, struct in_addr addr, int port); ++ ++struct gfwd_list *add_exec(struct gfwd_list **ex_ptr, const char *cmdline, ++ struct in_addr addr, int port); ++ ++struct gfwd_list *add_unix(struct gfwd_list **ex_ptr, const char *unixsock, ++ struct in_addr addr, int port); ++ ++int remove_guestfwd(struct gfwd_list **ex_ptr, struct in_addr addr, int port); ++ ++int slirp_bind_outbound(struct socket *so, unsigned short af); ++ ++#endif +diff --git a/slirp/src/ncsi-pkt.h b/slirp/src/ncsi-pkt.h +new file mode 100644 +index 0000000000..39cf8446d6 +--- /dev/null ++++ b/slirp/src/ncsi-pkt.h +@@ -0,0 +1,445 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright Gavin Shan, IBM Corporation 2016. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++ ++#ifndef NCSI_PKT_H ++#define NCSI_PKT_H ++ ++/* from linux/net/ncsi/ncsi-pkt.h */ ++#define __be32 uint32_t ++#define __be16 uint16_t ++ ++struct ncsi_pkt_hdr { ++ unsigned char mc_id; /* Management controller ID */ ++ unsigned char revision; /* NCSI version - 0x01 */ ++ unsigned char reserved; /* Reserved */ ++ unsigned char id; /* Packet sequence number */ ++ unsigned char type; /* Packet type */ ++ unsigned char channel; /* Network controller ID */ ++ __be16 length; /* Payload length */ ++ __be32 reserved1[2]; /* Reserved */ ++} SLIRP_PACKED; ++ ++struct ncsi_cmd_pkt_hdr { ++ struct ncsi_pkt_hdr common; /* Common NCSI packet header */ ++} SLIRP_PACKED; ++ ++struct ncsi_rsp_pkt_hdr { ++ struct ncsi_pkt_hdr common; /* Common NCSI packet header */ ++ __be16 code; /* Response code */ ++ __be16 reason; /* Response reason */ ++} SLIRP_PACKED; ++ ++struct ncsi_aen_pkt_hdr { ++ struct ncsi_pkt_hdr common; /* Common NCSI packet header */ ++ unsigned char reserved2[3]; /* Reserved */ ++ unsigned char type; /* AEN packet type */ ++} SLIRP_PACKED; ++ ++/* NCSI common command packet */ ++struct ncsi_cmd_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[26]; ++} SLIRP_PACKED; ++ ++struct ncsi_rsp_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++} SLIRP_PACKED; ++ ++/* Select Package */ ++struct ncsi_cmd_sp_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char hw_arbitration; /* HW arbitration */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++} SLIRP_PACKED; ++ ++/* Disable Channel */ ++struct ncsi_cmd_dc_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char ald; /* Allow link down */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++} SLIRP_PACKED; ++ ++/* Reset Channel */ ++struct ncsi_cmd_rc_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be32 reserved; /* Reserved */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++} SLIRP_PACKED; ++ ++/* AEN Enable */ ++struct ncsi_cmd_ae_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char mc_id; /* MC ID */ ++ __be32 mode; /* AEN working mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[18]; ++} SLIRP_PACKED; ++ ++/* Set Link */ ++struct ncsi_cmd_sl_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be32 mode; /* Link working mode */ ++ __be32 oem_mode; /* OEM link mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[18]; ++} SLIRP_PACKED; ++ ++/* Set VLAN Filter */ ++struct ncsi_cmd_svf_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be16 reserved; /* Reserved */ ++ __be16 vlan; /* VLAN ID */ ++ __be16 reserved1; /* Reserved */ ++ unsigned char index; /* VLAN table index */ ++ unsigned char enable; /* Enable or disable */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[14]; ++} SLIRP_PACKED; ++ ++/* Enable VLAN */ ++struct ncsi_cmd_ev_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char mode; /* VLAN filter mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++} SLIRP_PACKED; ++ ++/* Set MAC Address */ ++struct ncsi_cmd_sma_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char mac[6]; /* MAC address */ ++ unsigned char index; /* MAC table index */ ++ unsigned char at_e; /* Addr type and operation */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[18]; ++} SLIRP_PACKED; ++ ++/* Enable Broadcast Filter */ ++struct ncsi_cmd_ebf_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be32 mode; /* Filter mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++} SLIRP_PACKED; ++ ++/* Enable Global Multicast Filter */ ++struct ncsi_cmd_egmf_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be32 mode; /* Global MC mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++} SLIRP_PACKED; ++ ++/* Set NCSI Flow Control */ ++struct ncsi_cmd_snfc_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char mode; /* Flow control mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++} SLIRP_PACKED; ++ ++/* Get Link Status */ ++struct ncsi_rsp_gls_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 status; /* Link status */ ++ __be32 other; /* Other indications */ ++ __be32 oem_status; /* OEM link status */ ++ __be32 checksum; ++ unsigned char pad[10]; ++} SLIRP_PACKED; ++ ++/* Get Version ID */ ++struct ncsi_rsp_gvi_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 ncsi_version; /* NCSI version */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char alpha2; /* NCSI version */ ++ unsigned char fw_name[12]; /* f/w name string */ ++ __be32 fw_version; /* f/w version */ ++ __be16 pci_ids[4]; /* PCI IDs */ ++ __be32 mf_id; /* Manufacture ID */ ++ __be32 checksum; ++} SLIRP_PACKED; ++ ++/* Get Capabilities */ ++struct ncsi_rsp_gc_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 cap; /* Capabilities */ ++ __be32 bc_cap; /* Broadcast cap */ ++ __be32 mc_cap; /* Multicast cap */ ++ __be32 buf_cap; /* Buffering cap */ ++ __be32 aen_cap; /* AEN cap */ ++ unsigned char vlan_cnt; /* VLAN filter count */ ++ unsigned char mixed_cnt; /* Mix filter count */ ++ unsigned char mc_cnt; /* MC filter count */ ++ unsigned char uc_cnt; /* UC filter count */ ++ unsigned char reserved[2]; /* Reserved */ ++ unsigned char vlan_mode; /* VLAN mode */ ++ unsigned char channel_cnt; /* Channel count */ ++ __be32 checksum; /* Checksum */ ++} SLIRP_PACKED; ++ ++/* Get Parameters */ ++struct ncsi_rsp_gp_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ unsigned char mac_cnt; /* Number of MAC addr */ ++ unsigned char reserved[2]; /* Reserved */ ++ unsigned char mac_enable; /* MAC addr enable flags */ ++ unsigned char vlan_cnt; /* VLAN tag count */ ++ unsigned char reserved1; /* Reserved */ ++ __be16 vlan_enable; /* VLAN tag enable flags */ ++ __be32 link_mode; /* Link setting */ ++ __be32 bc_mode; /* BC filter mode */ ++ __be32 valid_modes; /* Valid mode parameters */ ++ unsigned char vlan_mode; /* VLAN mode */ ++ unsigned char fc_mode; /* Flow control mode */ ++ unsigned char reserved2[2]; /* Reserved */ ++ __be32 aen_mode; /* AEN mode */ ++ unsigned char mac[6]; /* Supported MAC addr */ ++ __be16 vlan; /* Supported VLAN tags */ ++ __be32 checksum; /* Checksum */ ++} SLIRP_PACKED; ++ ++/* Get Controller Packet Statistics */ ++struct ncsi_rsp_gcps_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 cnt_hi; /* Counter cleared */ ++ __be32 cnt_lo; /* Counter cleared */ ++ __be32 rx_bytes; /* Rx bytes */ ++ __be32 tx_bytes; /* Tx bytes */ ++ __be32 rx_uc_pkts; /* Rx UC packets */ ++ __be32 rx_mc_pkts; /* Rx MC packets */ ++ __be32 rx_bc_pkts; /* Rx BC packets */ ++ __be32 tx_uc_pkts; /* Tx UC packets */ ++ __be32 tx_mc_pkts; /* Tx MC packets */ ++ __be32 tx_bc_pkts; /* Tx BC packets */ ++ __be32 fcs_err; /* FCS errors */ ++ __be32 align_err; /* Alignment errors */ ++ __be32 false_carrier; /* False carrier detection */ ++ __be32 runt_pkts; /* Rx runt packets */ ++ __be32 jabber_pkts; /* Rx jabber packets */ ++ __be32 rx_pause_xon; /* Rx pause XON frames */ ++ __be32 rx_pause_xoff; /* Rx XOFF frames */ ++ __be32 tx_pause_xon; /* Tx XON frames */ ++ __be32 tx_pause_xoff; /* Tx XOFF frames */ ++ __be32 tx_s_collision; /* Single collision frames */ ++ __be32 tx_m_collision; /* Multiple collision frames */ ++ __be32 l_collision; /* Late collision frames */ ++ __be32 e_collision; /* Excessive collision frames */ ++ __be32 rx_ctl_frames; /* Rx control frames */ ++ __be32 rx_64_frames; /* Rx 64-bytes frames */ ++ __be32 rx_127_frames; /* Rx 65-127 bytes frames */ ++ __be32 rx_255_frames; /* Rx 128-255 bytes frames */ ++ __be32 rx_511_frames; /* Rx 256-511 bytes frames */ ++ __be32 rx_1023_frames; /* Rx 512-1023 bytes frames */ ++ __be32 rx_1522_frames; /* Rx 1024-1522 bytes frames */ ++ __be32 rx_9022_frames; /* Rx 1523-9022 bytes frames */ ++ __be32 tx_64_frames; /* Tx 64-bytes frames */ ++ __be32 tx_127_frames; /* Tx 65-127 bytes frames */ ++ __be32 tx_255_frames; /* Tx 128-255 bytes frames */ ++ __be32 tx_511_frames; /* Tx 256-511 bytes frames */ ++ __be32 tx_1023_frames; /* Tx 512-1023 bytes frames */ ++ __be32 tx_1522_frames; /* Tx 1024-1522 bytes frames */ ++ __be32 tx_9022_frames; /* Tx 1523-9022 bytes frames */ ++ __be32 rx_valid_bytes; /* Rx valid bytes */ ++ __be32 rx_runt_pkts; /* Rx error runt packets */ ++ __be32 rx_jabber_pkts; /* Rx error jabber packets */ ++ __be32 checksum; /* Checksum */ ++} SLIRP_PACKED; ++ ++/* Get NCSI Statistics */ ++struct ncsi_rsp_gns_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 rx_cmds; /* Rx NCSI commands */ ++ __be32 dropped_cmds; /* Dropped commands */ ++ __be32 cmd_type_errs; /* Command type errors */ ++ __be32 cmd_csum_errs; /* Command checksum errors */ ++ __be32 rx_pkts; /* Rx NCSI packets */ ++ __be32 tx_pkts; /* Tx NCSI packets */ ++ __be32 tx_aen_pkts; /* Tx AEN packets */ ++ __be32 checksum; /* Checksum */ ++} SLIRP_PACKED; ++ ++/* Get NCSI Pass-through Statistics */ ++struct ncsi_rsp_gnpts_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 tx_pkts; /* Tx packets */ ++ __be32 tx_dropped; /* Tx dropped packets */ ++ __be32 tx_channel_err; /* Tx channel errors */ ++ __be32 tx_us_err; /* Tx undersize errors */ ++ __be32 rx_pkts; /* Rx packets */ ++ __be32 rx_dropped; /* Rx dropped packets */ ++ __be32 rx_channel_err; /* Rx channel errors */ ++ __be32 rx_us_err; /* Rx undersize errors */ ++ __be32 rx_os_err; /* Rx oversize errors */ ++ __be32 checksum; /* Checksum */ ++} SLIRP_PACKED; ++ ++/* Get package status */ ++struct ncsi_rsp_gps_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 status; /* Hardware arbitration status */ ++ __be32 checksum; ++} SLIRP_PACKED; ++ ++/* Get package UUID */ ++struct ncsi_rsp_gpuuid_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ unsigned char uuid[16]; /* UUID */ ++ __be32 checksum; ++} SLIRP_PACKED; ++ ++/* AEN: Link State Change */ ++struct ncsi_aen_lsc_pkt { ++ struct ncsi_aen_pkt_hdr aen; /* AEN header */ ++ __be32 status; /* Link status */ ++ __be32 oem_status; /* OEM link status */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[14]; ++} SLIRP_PACKED; ++ ++/* AEN: Configuration Required */ ++struct ncsi_aen_cr_pkt { ++ struct ncsi_aen_pkt_hdr aen; /* AEN header */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++} SLIRP_PACKED; ++ ++/* AEN: Host Network Controller Driver Status Change */ ++struct ncsi_aen_hncdsc_pkt { ++ struct ncsi_aen_pkt_hdr aen; /* AEN header */ ++ __be32 status; /* Status */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[18]; ++} SLIRP_PACKED; ++ ++/* NCSI packet revision */ ++#define NCSI_PKT_REVISION 0x01 ++ ++/* NCSI packet commands */ ++#define NCSI_PKT_CMD_CIS 0x00 /* Clear Initial State */ ++#define NCSI_PKT_CMD_SP 0x01 /* Select Package */ ++#define NCSI_PKT_CMD_DP 0x02 /* Deselect Package */ ++#define NCSI_PKT_CMD_EC 0x03 /* Enable Channel */ ++#define NCSI_PKT_CMD_DC 0x04 /* Disable Channel */ ++#define NCSI_PKT_CMD_RC 0x05 /* Reset Channel */ ++#define NCSI_PKT_CMD_ECNT 0x06 /* Enable Channel Network Tx */ ++#define NCSI_PKT_CMD_DCNT 0x07 /* Disable Channel Network Tx */ ++#define NCSI_PKT_CMD_AE 0x08 /* AEN Enable */ ++#define NCSI_PKT_CMD_SL 0x09 /* Set Link */ ++#define NCSI_PKT_CMD_GLS 0x0a /* Get Link */ ++#define NCSI_PKT_CMD_SVF 0x0b /* Set VLAN Filter */ ++#define NCSI_PKT_CMD_EV 0x0c /* Enable VLAN */ ++#define NCSI_PKT_CMD_DV 0x0d /* Disable VLAN */ ++#define NCSI_PKT_CMD_SMA 0x0e /* Set MAC address */ ++#define NCSI_PKT_CMD_EBF 0x10 /* Enable Broadcast Filter */ ++#define NCSI_PKT_CMD_DBF 0x11 /* Disable Broadcast Filter */ ++#define NCSI_PKT_CMD_EGMF 0x12 /* Enable Global Multicast Filter */ ++#define NCSI_PKT_CMD_DGMF 0x13 /* Disable Global Multicast Filter */ ++#define NCSI_PKT_CMD_SNFC 0x14 /* Set NCSI Flow Control */ ++#define NCSI_PKT_CMD_GVI 0x15 /* Get Version ID */ ++#define NCSI_PKT_CMD_GC 0x16 /* Get Capabilities */ ++#define NCSI_PKT_CMD_GP 0x17 /* Get Parameters */ ++#define NCSI_PKT_CMD_GCPS 0x18 /* Get Controller Packet Statistics */ ++#define NCSI_PKT_CMD_GNS 0x19 /* Get NCSI Statistics */ ++#define NCSI_PKT_CMD_GNPTS 0x1a /* Get NCSI Pass-throu Statistics */ ++#define NCSI_PKT_CMD_GPS 0x1b /* Get package status */ ++#define NCSI_PKT_CMD_OEM 0x50 /* OEM */ ++#define NCSI_PKT_CMD_PLDM 0x51 /* PLDM request over NCSI over RBT */ ++#define NCSI_PKT_CMD_GPUUID 0x52 /* Get package UUID */ ++ ++/* NCSI packet responses */ ++#define NCSI_PKT_RSP_CIS (NCSI_PKT_CMD_CIS + 0x80) ++#define NCSI_PKT_RSP_SP (NCSI_PKT_CMD_SP + 0x80) ++#define NCSI_PKT_RSP_DP (NCSI_PKT_CMD_DP + 0x80) ++#define NCSI_PKT_RSP_EC (NCSI_PKT_CMD_EC + 0x80) ++#define NCSI_PKT_RSP_DC (NCSI_PKT_CMD_DC + 0x80) ++#define NCSI_PKT_RSP_RC (NCSI_PKT_CMD_RC + 0x80) ++#define NCSI_PKT_RSP_ECNT (NCSI_PKT_CMD_ECNT + 0x80) ++#define NCSI_PKT_RSP_DCNT (NCSI_PKT_CMD_DCNT + 0x80) ++#define NCSI_PKT_RSP_AE (NCSI_PKT_CMD_AE + 0x80) ++#define NCSI_PKT_RSP_SL (NCSI_PKT_CMD_SL + 0x80) ++#define NCSI_PKT_RSP_GLS (NCSI_PKT_CMD_GLS + 0x80) ++#define NCSI_PKT_RSP_SVF (NCSI_PKT_CMD_SVF + 0x80) ++#define NCSI_PKT_RSP_EV (NCSI_PKT_CMD_EV + 0x80) ++#define NCSI_PKT_RSP_DV (NCSI_PKT_CMD_DV + 0x80) ++#define NCSI_PKT_RSP_SMA (NCSI_PKT_CMD_SMA + 0x80) ++#define NCSI_PKT_RSP_EBF (NCSI_PKT_CMD_EBF + 0x80) ++#define NCSI_PKT_RSP_DBF (NCSI_PKT_CMD_DBF + 0x80) ++#define NCSI_PKT_RSP_EGMF (NCSI_PKT_CMD_EGMF + 0x80) ++#define NCSI_PKT_RSP_DGMF (NCSI_PKT_CMD_DGMF + 0x80) ++#define NCSI_PKT_RSP_SNFC (NCSI_PKT_CMD_SNFC + 0x80) ++#define NCSI_PKT_RSP_GVI (NCSI_PKT_CMD_GVI + 0x80) ++#define NCSI_PKT_RSP_GC (NCSI_PKT_CMD_GC + 0x80) ++#define NCSI_PKT_RSP_GP (NCSI_PKT_CMD_GP + 0x80) ++#define NCSI_PKT_RSP_GCPS (NCSI_PKT_CMD_GCPS + 0x80) ++#define NCSI_PKT_RSP_GNS (NCSI_PKT_CMD_GNS + 0x80) ++#define NCSI_PKT_RSP_GNPTS (NCSI_PKT_CMD_GNPTS + 0x80) ++#define NCSI_PKT_RSP_GPS (NCSI_PKT_CMD_GPS + 0x80) ++#define NCSI_PKT_RSP_OEM (NCSI_PKT_CMD_OEM + 0x80) ++#define NCSI_PKT_RSP_PLDM (NCSI_PKT_CMD_PLDM + 0x80) ++#define NCSI_PKT_RSP_GPUUID (NCSI_PKT_CMD_GPUUID + 0x80) ++ ++/* NCSI response code/reason */ ++#define NCSI_PKT_RSP_C_COMPLETED 0x0000 /* Command Completed */ ++#define NCSI_PKT_RSP_C_FAILED 0x0001 /* Command Failed */ ++#define NCSI_PKT_RSP_C_UNAVAILABLE 0x0002 /* Command Unavailable */ ++#define NCSI_PKT_RSP_C_UNSUPPORTED 0x0003 /* Command Unsupported */ ++#define NCSI_PKT_RSP_R_NO_ERROR 0x0000 /* No Error */ ++#define NCSI_PKT_RSP_R_INTERFACE 0x0001 /* Interface not ready */ ++#define NCSI_PKT_RSP_R_PARAM 0x0002 /* Invalid Parameter */ ++#define NCSI_PKT_RSP_R_CHANNEL 0x0003 /* Channel not Ready */ ++#define NCSI_PKT_RSP_R_PACKAGE 0x0004 /* Package not Ready */ ++#define NCSI_PKT_RSP_R_LENGTH 0x0005 /* Invalid payload length */ ++#define NCSI_PKT_RSP_R_UNKNOWN 0x7fff /* Command type unsupported */ ++ ++/* NCSI AEN packet type */ ++#define NCSI_PKT_AEN 0xFF /* AEN Packet */ ++#define NCSI_PKT_AEN_LSC 0x00 /* Link status change */ ++#define NCSI_PKT_AEN_CR 0x01 /* Configuration required */ ++#define NCSI_PKT_AEN_HNCDSC 0x02 /* HNC driver status change */ ++ ++#endif /* NCSI_PKT_H */ +diff --git a/slirp/src/ncsi.c b/slirp/src/ncsi.c +new file mode 100644 +index 0000000000..f3427bd66d +--- /dev/null ++++ b/slirp/src/ncsi.c +@@ -0,0 +1,197 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * NC-SI (Network Controller Sideband Interface) "echo" model ++ * ++ * Copyright (C) 2016-2018 IBM Corp. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++#include "slirp.h" ++ ++#include "ncsi-pkt.h" ++ ++static uint32_t ncsi_calculate_checksum(uint8_t *data, int len) ++{ ++ uint32_t checksum = 0; ++ int i; ++ ++ /* ++ * 32-bit unsigned sum of the NC-SI packet header and NC-SI packet ++ * payload interpreted as a series of 16-bit unsigned integer values. ++ */ ++ for (i = 0; i < len; i += 2) { ++ checksum += (((uint16_t) data[i]) << 8) + data[i+1]; ++ } ++ ++ checksum = (~checksum + 1); ++ return checksum; ++} ++ ++/* Get Capabilities */ ++static int ncsi_rsp_handler_gc(struct ncsi_rsp_pkt_hdr *rnh) ++{ ++ struct ncsi_rsp_gc_pkt *rsp = (struct ncsi_rsp_gc_pkt *)rnh; ++ ++ rsp->cap = htonl(~0); ++ rsp->bc_cap = htonl(~0); ++ rsp->mc_cap = htonl(~0); ++ rsp->buf_cap = htonl(~0); ++ rsp->aen_cap = htonl(~0); ++ rsp->vlan_mode = 0xff; ++ rsp->uc_cnt = 2; ++ return 0; ++} ++ ++/* Get Link status */ ++static int ncsi_rsp_handler_gls(struct ncsi_rsp_pkt_hdr *rnh) ++{ ++ struct ncsi_rsp_gls_pkt *rsp = (struct ncsi_rsp_gls_pkt *)rnh; ++ ++ rsp->status = htonl(0x1); ++ return 0; ++} ++ ++/* Get Parameters */ ++static int ncsi_rsp_handler_gp(struct ncsi_rsp_pkt_hdr *rnh) ++{ ++ struct ncsi_rsp_gp_pkt *rsp = (struct ncsi_rsp_gp_pkt *)rnh; ++ ++ /* no MAC address filters or VLAN filters on the channel */ ++ rsp->mac_cnt = 0; ++ rsp->mac_enable = 0; ++ rsp->vlan_cnt = 0; ++ rsp->vlan_enable = 0; ++ ++ return 0; ++} ++ ++static const struct ncsi_rsp_handler { ++ unsigned char type; ++ int payload; ++ int (*handler)(struct ncsi_rsp_pkt_hdr *rnh); ++} ncsi_rsp_handlers[] = { { NCSI_PKT_RSP_CIS, 4, NULL }, ++ { NCSI_PKT_RSP_SP, 4, NULL }, ++ { NCSI_PKT_RSP_DP, 4, NULL }, ++ { NCSI_PKT_RSP_EC, 4, NULL }, ++ { NCSI_PKT_RSP_DC, 4, NULL }, ++ { NCSI_PKT_RSP_RC, 4, NULL }, ++ { NCSI_PKT_RSP_ECNT, 4, NULL }, ++ { NCSI_PKT_RSP_DCNT, 4, NULL }, ++ { NCSI_PKT_RSP_AE, 4, NULL }, ++ { NCSI_PKT_RSP_SL, 4, NULL }, ++ { NCSI_PKT_RSP_GLS, 16, ncsi_rsp_handler_gls }, ++ { NCSI_PKT_RSP_SVF, 4, NULL }, ++ { NCSI_PKT_RSP_EV, 4, NULL }, ++ { NCSI_PKT_RSP_DV, 4, NULL }, ++ { NCSI_PKT_RSP_SMA, 4, NULL }, ++ { NCSI_PKT_RSP_EBF, 4, NULL }, ++ { NCSI_PKT_RSP_DBF, 4, NULL }, ++ { NCSI_PKT_RSP_EGMF, 4, NULL }, ++ { NCSI_PKT_RSP_DGMF, 4, NULL }, ++ { NCSI_PKT_RSP_SNFC, 4, NULL }, ++ { NCSI_PKT_RSP_GVI, 40, NULL }, ++ { NCSI_PKT_RSP_GC, 32, ncsi_rsp_handler_gc }, ++ { NCSI_PKT_RSP_GP, 40, ncsi_rsp_handler_gp }, ++ { NCSI_PKT_RSP_GCPS, 172, NULL }, ++ { NCSI_PKT_RSP_GNS, 172, NULL }, ++ { NCSI_PKT_RSP_GNPTS, 172, NULL }, ++ { NCSI_PKT_RSP_GPS, 8, NULL }, ++ { NCSI_PKT_RSP_OEM, 0, NULL }, ++ { NCSI_PKT_RSP_PLDM, 0, NULL }, ++ { NCSI_PKT_RSP_GPUUID, 20, NULL } }; ++ ++/* ++ * packet format : ncsi header + payload + checksum ++ */ ++#define NCSI_MAX_PAYLOAD 172 ++#define NCSI_MAX_LEN (sizeof(struct ncsi_pkt_hdr) + NCSI_MAX_PAYLOAD + 4) ++ ++void ncsi_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) ++{ ++ const struct ncsi_pkt_hdr *nh = ++ (const struct ncsi_pkt_hdr *)(pkt + ETH_HLEN); ++ uint8_t ncsi_reply[ETH_HLEN + NCSI_MAX_LEN]; ++ struct ethhdr *reh = (struct ethhdr *)ncsi_reply; ++ struct ncsi_rsp_pkt_hdr *rnh = ++ (struct ncsi_rsp_pkt_hdr *)(ncsi_reply + ETH_HLEN); ++ const struct ncsi_rsp_handler *handler = NULL; ++ int i; ++ int ncsi_rsp_len = sizeof(*nh); ++ uint32_t checksum; ++ uint32_t *pchecksum; ++ ++ if (pkt_len < ETH_HLEN + sizeof(struct ncsi_pkt_hdr)) { ++ return; /* packet too short */ ++ } ++ ++ memset(ncsi_reply, 0, sizeof(ncsi_reply)); ++ ++ memset(reh->h_dest, 0xff, ETH_ALEN); ++ memset(reh->h_source, 0xff, ETH_ALEN); ++ reh->h_proto = htons(ETH_P_NCSI); ++ ++ for (i = 0; i < G_N_ELEMENTS(ncsi_rsp_handlers); i++) { ++ if (ncsi_rsp_handlers[i].type == nh->type + 0x80) { ++ handler = &ncsi_rsp_handlers[i]; ++ break; ++ } ++ } ++ ++ rnh->common.mc_id = nh->mc_id; ++ rnh->common.revision = NCSI_PKT_REVISION; ++ rnh->common.id = nh->id; ++ rnh->common.type = nh->type + 0x80; ++ rnh->common.channel = nh->channel; ++ ++ if (handler) { ++ rnh->common.length = htons(handler->payload); ++ rnh->code = htons(NCSI_PKT_RSP_C_COMPLETED); ++ rnh->reason = htons(NCSI_PKT_RSP_R_NO_ERROR); ++ ++ if (handler->handler) { ++ /* TODO: handle errors */ ++ handler->handler(rnh); ++ } ++ ncsi_rsp_len += handler->payload; ++ } else { ++ rnh->common.length = 0; ++ rnh->code = htons(NCSI_PKT_RSP_C_UNAVAILABLE); ++ rnh->reason = htons(NCSI_PKT_RSP_R_UNKNOWN); ++ } ++ ++ /* Add the optional checksum at the end of the frame. */ ++ checksum = ncsi_calculate_checksum((uint8_t *)rnh, ncsi_rsp_len); ++ pchecksum = (uint32_t *)((void *)rnh + ncsi_rsp_len); ++ *pchecksum = htonl(checksum); ++ ncsi_rsp_len += 4; ++ ++ slirp_send_packet_all(slirp, ncsi_reply, ETH_HLEN + ncsi_rsp_len); ++} +diff --git a/slirp/src/ndp_table.c b/slirp/src/ndp_table.c +new file mode 100644 +index 0000000000..fdb189d595 +--- /dev/null ++++ b/slirp/src/ndp_table.c +@@ -0,0 +1,98 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#include "slirp.h" ++ ++void ndp_table_add(Slirp *slirp, struct in6_addr ip_addr, ++ uint8_t ethaddr[ETH_ALEN]) ++{ ++ char addrstr[INET6_ADDRSTRLEN]; ++ NdpTable *ndp_table = &slirp->ndp_table; ++ int i; ++ char ethaddr_str[ETH_ADDRSTRLEN]; ++ ++ inet_ntop(AF_INET6, &(ip_addr), addrstr, INET6_ADDRSTRLEN); ++ ++ DEBUG_CALL("ndp_table_add"); ++ DEBUG_ARG("ip = %s", addrstr); ++ DEBUG_ARG("hw addr = %s", slirp_ether_ntoa(ethaddr, ethaddr_str, ++ sizeof(ethaddr_str))); ++ ++ if (IN6_IS_ADDR_MULTICAST(&ip_addr) || in6_zero(&ip_addr)) { ++ /* Do not register multicast or unspecified addresses */ ++ DEBUG_CALL(" abort: do not register multicast or unspecified address"); ++ return; ++ } ++ ++ /* Search for an entry */ ++ for (i = 0; i < NDP_TABLE_SIZE; i++) { ++ if (in6_equal(&ndp_table->table[i].ip_addr, &ip_addr)) { ++ DEBUG_CALL(" already in table: update the entry"); ++ /* Update the entry */ ++ memcpy(ndp_table->table[i].eth_addr, ethaddr, ETH_ALEN); ++ return; ++ } ++ } ++ ++ /* No entry found, create a new one */ ++ DEBUG_CALL(" create new entry"); ++ /* Save the first entry, it is the guest. */ ++ if (in6_zero(&ndp_table->guest_in6_addr)) { ++ ndp_table->guest_in6_addr = ip_addr; ++ } ++ ndp_table->table[ndp_table->next_victim].ip_addr = ip_addr; ++ memcpy(ndp_table->table[ndp_table->next_victim].eth_addr, ethaddr, ++ ETH_ALEN); ++ ndp_table->next_victim = (ndp_table->next_victim + 1) % NDP_TABLE_SIZE; ++} ++ ++bool ndp_table_search(Slirp *slirp, struct in6_addr ip_addr, ++ uint8_t out_ethaddr[ETH_ALEN]) ++{ ++ char addrstr[INET6_ADDRSTRLEN]; ++ NdpTable *ndp_table = &slirp->ndp_table; ++ int i; ++ char ethaddr_str[ETH_ADDRSTRLEN]; ++ ++ inet_ntop(AF_INET6, &(ip_addr), addrstr, INET6_ADDRSTRLEN); ++ ++ DEBUG_CALL("ndp_table_search"); ++ DEBUG_ARG("ip = %s", addrstr); ++ ++ /* If unspecified address */ ++ if (in6_zero(&ip_addr)) { ++ /* return Ethernet broadcast address */ ++ memset(out_ethaddr, 0xff, ETH_ALEN); ++ return 1; ++ } ++ ++ /* Multicast address: fec0::abcd:efgh/8 -> 33:33:ab:cd:ef:gh */ ++ if (IN6_IS_ADDR_MULTICAST(&ip_addr)) { ++ out_ethaddr[0] = 0x33; ++ out_ethaddr[1] = 0x33; ++ out_ethaddr[2] = ip_addr.s6_addr[12]; ++ out_ethaddr[3] = ip_addr.s6_addr[13]; ++ out_ethaddr[4] = ip_addr.s6_addr[14]; ++ out_ethaddr[5] = ip_addr.s6_addr[15]; ++ DEBUG_ARG("multicast addr = %s", ++ slirp_ether_ntoa(out_ethaddr, ethaddr_str, ++ sizeof(ethaddr_str))); ++ return 1; ++ } ++ ++ for (i = 0; i < NDP_TABLE_SIZE; i++) { ++ if (in6_equal(&ndp_table->table[i].ip_addr, &ip_addr)) { ++ memcpy(out_ethaddr, ndp_table->table[i].eth_addr, ETH_ALEN); ++ DEBUG_ARG("found hw addr = %s", ++ slirp_ether_ntoa(out_ethaddr, ethaddr_str, ++ sizeof(ethaddr_str))); ++ return 1; ++ } ++ } ++ ++ DEBUG_CALL(" ip not found in table"); ++ return 0; ++} +diff --git a/slirp/src/sbuf.c b/slirp/src/sbuf.c +new file mode 100644 +index 0000000000..b357091705 +--- /dev/null ++++ b/slirp/src/sbuf.c +@@ -0,0 +1,168 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++ ++static void sbappendsb(struct sbuf *sb, struct mbuf *m); ++ ++void sbfree(struct sbuf *sb) ++{ ++ g_free(sb->sb_data); ++} ++ ++bool sbdrop(struct sbuf *sb, size_t num) ++{ ++ int limit = sb->sb_datalen / 2; ++ ++ g_warn_if_fail(num <= sb->sb_cc); ++ if (num > sb->sb_cc) ++ num = sb->sb_cc; ++ ++ sb->sb_cc -= num; ++ sb->sb_rptr += num; ++ if (sb->sb_rptr >= sb->sb_data + sb->sb_datalen) ++ sb->sb_rptr -= sb->sb_datalen; ++ ++ if (sb->sb_cc < limit && sb->sb_cc + num >= limit) { ++ return true; ++ } ++ ++ return false; ++} ++ ++void sbreserve(struct sbuf *sb, size_t size) ++{ ++ sb->sb_wptr = sb->sb_rptr = sb->sb_data = g_realloc(sb->sb_data, size); ++ sb->sb_cc = 0; ++ sb->sb_datalen = size; ++} ++ ++/* ++ * Try and write() to the socket, whatever doesn't get written ++ * append to the buffer... for a host with a fast net connection, ++ * this prevents an unnecessary copy of the data ++ * (the socket is non-blocking, so we won't hang) ++ */ ++void sbappend(struct socket *so, struct mbuf *m) ++{ ++ int ret = 0; ++ ++ DEBUG_CALL("sbappend"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("m->m_len = %d", m->m_len); ++ ++ /* Shouldn't happen, but... e.g. foreign host closes connection */ ++ if (m->m_len <= 0) { ++ m_free(m); ++ return; ++ } ++ ++ /* ++ * If there is urgent data, call sosendoob ++ * if not all was sent, sowrite will take care of the rest ++ * (The rest of this function is just an optimisation) ++ */ ++ if (so->so_urgc) { ++ sbappendsb(&so->so_rcv, m); ++ m_free(m); ++ sosendoob(so); ++ return; ++ } ++ ++ /* ++ * We only write if there's nothing in the buffer, ++ * ottherwise it'll arrive out of order, and hence corrupt ++ */ ++ if (!so->so_rcv.sb_cc) ++ ret = slirp_send(so, m->m_data, m->m_len, 0); ++ ++ if (ret <= 0) { ++ /* ++ * Nothing was written ++ * It's possible that the socket has closed, but ++ * we don't need to check because if it has closed, ++ * it will be detected in the normal way by soread() ++ */ ++ sbappendsb(&so->so_rcv, m); ++ } else if (ret != m->m_len) { ++ /* ++ * Something was written, but not everything.. ++ * sbappendsb the rest ++ */ ++ m->m_len -= ret; ++ m->m_data += ret; ++ sbappendsb(&so->so_rcv, m); ++ } /* else */ ++ /* Whatever happened, we free the mbuf */ ++ m_free(m); ++} ++ ++/* ++ * Copy the data from m into sb ++ * The caller is responsible to make sure there's enough room ++ */ ++static void sbappendsb(struct sbuf *sb, struct mbuf *m) ++{ ++ int len, n, nn; ++ ++ len = m->m_len; ++ ++ if (sb->sb_wptr < sb->sb_rptr) { ++ n = sb->sb_rptr - sb->sb_wptr; ++ if (n > len) ++ n = len; ++ memcpy(sb->sb_wptr, m->m_data, n); ++ } else { ++ /* Do the right edge first */ ++ n = sb->sb_data + sb->sb_datalen - sb->sb_wptr; ++ if (n > len) ++ n = len; ++ memcpy(sb->sb_wptr, m->m_data, n); ++ len -= n; ++ if (len) { ++ /* Now the left edge */ ++ nn = sb->sb_rptr - sb->sb_data; ++ if (nn > len) ++ nn = len; ++ memcpy(sb->sb_data, m->m_data + n, nn); ++ n += nn; ++ } ++ } ++ ++ sb->sb_cc += n; ++ sb->sb_wptr += n; ++ if (sb->sb_wptr >= sb->sb_data + sb->sb_datalen) ++ sb->sb_wptr -= sb->sb_datalen; ++} ++ ++/* ++ * Copy data from sbuf to a normal, straight buffer ++ * Don't update the sbuf rptr, this will be ++ * done in sbdrop when the data is acked ++ */ ++void sbcopy(struct sbuf *sb, size_t off, size_t len, char *to) ++{ ++ char *from; ++ ++ g_assert(len + off <= sb->sb_cc); ++ ++ from = sb->sb_rptr + off; ++ if (from >= sb->sb_data + sb->sb_datalen) ++ from -= sb->sb_datalen; ++ ++ if (from < sb->sb_wptr) { ++ memcpy(to, from, len); ++ } else { ++ /* re-use off */ ++ off = (sb->sb_data + sb->sb_datalen) - from; ++ if (off > len) ++ off = len; ++ memcpy(to, from, off); ++ len -= off; ++ if (len) ++ memcpy(to + off, sb->sb_data, len); ++ } ++} +diff --git a/slirp/src/sbuf.h b/slirp/src/sbuf.h +new file mode 100644 +index 0000000000..01886fbd01 +--- /dev/null ++++ b/slirp/src/sbuf.h +@@ -0,0 +1,27 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef SBUF_H ++#define SBUF_H ++ ++#define sbspace(sb) ((sb)->sb_datalen - (sb)->sb_cc) ++ ++struct sbuf { ++ uint32_t sb_cc; /* actual chars in buffer */ ++ uint32_t sb_datalen; /* Length of data */ ++ char *sb_wptr; /* write pointer. points to where the next ++ * bytes should be written in the sbuf */ ++ char *sb_rptr; /* read pointer. points to where the next ++ * byte should be read from the sbuf */ ++ char *sb_data; /* Actual data */ ++}; ++ ++void sbfree(struct sbuf *sb); ++bool sbdrop(struct sbuf *sb, size_t len); ++void sbreserve(struct sbuf *sb, size_t size); ++void sbappend(struct socket *sb, struct mbuf *mb); ++void sbcopy(struct sbuf *sb, size_t off, size_t len, char *p); ++ ++#endif +diff --git a/slirp/src/slirp.c b/slirp/src/slirp.c +new file mode 100644 +index 0000000000..9d3fee3e97 +--- /dev/null ++++ b/slirp/src/slirp.c +@@ -0,0 +1,1387 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * libslirp glue ++ * ++ * Copyright (c) 2004-2008 Fabrice Bellard ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "slirp.h" ++ ++ ++#ifndef _WIN32 ++#include ++#endif ++ ++/* https://gitlab.freedesktop.org/slirp/libslirp/issues/18 */ ++#if defined(__NetBSD__) && defined(if_mtu) ++#undef if_mtu ++#endif ++ ++int slirp_debug; ++ ++/* Define to 1 if you want KEEPALIVE timers */ ++bool slirp_do_keepalive; ++ ++/* host loopback address */ ++struct in_addr loopback_addr; ++/* host loopback network mask */ ++unsigned long loopback_mask; ++ ++/* emulated hosts use the MAC addr 52:55:IP:IP:IP:IP */ ++static const uint8_t special_ethaddr[ETH_ALEN] = { 0x52, 0x55, 0x00, ++ 0x00, 0x00, 0x00 }; ++ ++unsigned curtime; ++ ++static struct in_addr dns_addr; ++#ifndef _WIN32 ++static struct in6_addr dns6_addr; ++#endif ++static unsigned dns_addr_time; ++#ifndef _WIN32 ++static unsigned dns6_addr_time; ++#endif ++ ++#define TIMEOUT_FAST 2 /* milliseconds */ ++#define TIMEOUT_SLOW 499 /* milliseconds */ ++/* for the aging of certain requests like DNS */ ++#define TIMEOUT_DEFAULT 1000 /* milliseconds */ ++ ++#if defined(_WIN32) ++ ++int get_dns_addr(struct in_addr *pdns_addr) ++{ ++ FIXED_INFO *FixedInfo = NULL; ++ ULONG BufLen; ++ DWORD ret; ++ IP_ADDR_STRING *pIPAddr; ++ struct in_addr tmp_addr; ++ ++ if (dns_addr.s_addr != 0 && (curtime - dns_addr_time) < TIMEOUT_DEFAULT) { ++ *pdns_addr = dns_addr; ++ return 0; ++ } ++ ++ FixedInfo = (FIXED_INFO *)GlobalAlloc(GPTR, sizeof(FIXED_INFO)); ++ BufLen = sizeof(FIXED_INFO); ++ ++ if (ERROR_BUFFER_OVERFLOW == GetNetworkParams(FixedInfo, &BufLen)) { ++ if (FixedInfo) { ++ GlobalFree(FixedInfo); ++ FixedInfo = NULL; ++ } ++ FixedInfo = GlobalAlloc(GPTR, BufLen); ++ } ++ ++ if ((ret = GetNetworkParams(FixedInfo, &BufLen)) != ERROR_SUCCESS) { ++ printf("GetNetworkParams failed. ret = %08x\n", (unsigned)ret); ++ if (FixedInfo) { ++ GlobalFree(FixedInfo); ++ FixedInfo = NULL; ++ } ++ return -1; ++ } ++ ++ pIPAddr = &(FixedInfo->DnsServerList); ++ inet_aton(pIPAddr->IpAddress.String, &tmp_addr); ++ *pdns_addr = tmp_addr; ++ dns_addr = tmp_addr; ++ dns_addr_time = curtime; ++ if (FixedInfo) { ++ GlobalFree(FixedInfo); ++ FixedInfo = NULL; ++ } ++ return 0; ++} ++ ++int get_dns6_addr(struct in6_addr *pdns6_addr, uint32_t *scope_id) ++{ ++ return -1; ++} ++ ++static void winsock_cleanup(void) ++{ ++ WSACleanup(); ++} ++ ++#elif defined(__APPLE__) ++ ++#include ++ ++static int get_dns_addr_cached(void *pdns_addr, void *cached_addr, ++ socklen_t addrlen, unsigned *cached_time) ++{ ++ if (curtime - *cached_time < TIMEOUT_DEFAULT) { ++ memcpy(pdns_addr, cached_addr, addrlen); ++ return 0; ++ } ++ return 1; ++} ++ ++static int get_dns_addr_libresolv(int af, void *pdns_addr, void *cached_addr, ++ socklen_t addrlen, uint32_t *scope_id, ++ unsigned *cached_time) ++{ ++ struct __res_state state; ++ union res_sockaddr_union servers[NI_MAXSERV]; ++ int count; ++ int found; ++ ++ if (res_ninit(&state) != 0) { ++ return -1; ++ } ++ ++ count = res_getservers(&state, servers, NI_MAXSERV); ++ found = 0; ++ DEBUG_MISC("IP address of your DNS(s):"); ++ for (int i = 0; i < count; i++) { ++ if (af == servers[i].sin.sin_family) { ++ found++; ++ } ++ ++ // we use the first found entry ++ if (found == 1) { ++ memcpy(pdns_addr, &servers[i].sin.sin_addr, addrlen); ++ memcpy(cached_addr, &servers[i].sin.sin_addr, addrlen); ++ if (scope_id) { ++ *scope_id = 0; ++ } ++ *cached_time = curtime; ++ } ++ ++ if (found > 3) { ++ DEBUG_MISC(" (more)"); ++ break; ++ } else if (slirp_debug & DBG_MISC) { ++ char s[INET6_ADDRSTRLEN]; ++ const char *res = inet_ntop(servers[i].sin.sin_family, ++ &servers[i].sin.sin_addr, ++ s, ++ sizeof(s)); ++ if (!res) { ++ res = " (string conversion error)"; ++ } ++ DEBUG_MISC(" %s", res); ++ } ++ } ++ ++ res_nclose(&state); ++ if (!found) ++ return -1; ++ return 0; ++} ++ ++int get_dns_addr(struct in_addr *pdns_addr) ++{ ++ if (dns_addr.s_addr != 0) { ++ int ret; ++ ret = get_dns_addr_cached(pdns_addr, &dns_addr, sizeof(dns_addr), ++ &dns_addr_time); ++ if (ret <= 0) { ++ return ret; ++ } ++ } ++ return get_dns_addr_libresolv(AF_INET, pdns_addr, &dns_addr, ++ sizeof(dns_addr), NULL, &dns_addr_time); ++} ++ ++int get_dns6_addr(struct in6_addr *pdns6_addr, uint32_t *scope_id) ++{ ++ if (!in6_zero(&dns6_addr)) { ++ int ret; ++ ret = get_dns_addr_cached(pdns6_addr, &dns6_addr, sizeof(dns6_addr), ++ &dns6_addr_time); ++ if (ret <= 0) { ++ return ret; ++ } ++ } ++ return get_dns_addr_libresolv(AF_INET6, pdns6_addr, &dns6_addr, ++ sizeof(dns6_addr), scope_id, &dns6_addr_time); ++} ++ ++#else // !defined(_WIN32) && !defined(__APPLE__) ++ ++#if defined(__HAIKU__) ++#define RESOLV_CONF_PATH "/boot/system/settings/network/resolv.conf" ++#else ++#define RESOLV_CONF_PATH "/etc/resolv.conf" ++#endif ++ ++static int get_dns_addr_cached(void *pdns_addr, void *cached_addr, ++ socklen_t addrlen, struct stat *cached_stat, ++ unsigned *cached_time) ++{ ++ struct stat old_stat; ++ if (curtime - *cached_time < TIMEOUT_DEFAULT) { ++ memcpy(pdns_addr, cached_addr, addrlen); ++ return 0; ++ } ++ old_stat = *cached_stat; ++ if (stat(RESOLV_CONF_PATH, cached_stat) != 0) { ++ return -1; ++ } ++ if (cached_stat->st_dev == old_stat.st_dev && ++ cached_stat->st_ino == old_stat.st_ino && ++ cached_stat->st_size == old_stat.st_size && ++ cached_stat->st_mtime == old_stat.st_mtime) { ++ memcpy(pdns_addr, cached_addr, addrlen); ++ return 0; ++ } ++ return 1; ++} ++ ++static int get_dns_addr_resolv_conf(int af, void *pdns_addr, void *cached_addr, ++ socklen_t addrlen, uint32_t *scope_id, ++ unsigned *cached_time) ++{ ++ char buff[512]; ++ char buff2[257]; ++ FILE *f; ++ int found = 0; ++ union { ++ struct in_addr dns_addr; ++ struct in6_addr dns6_addr; ++ } tmp_addr; ++ unsigned if_index; ++ ++ assert(sizeof(tmp_addr) >= addrlen); ++ f = fopen(RESOLV_CONF_PATH, "r"); ++ if (!f) ++ return -1; ++ ++ DEBUG_MISC("IP address of your DNS(s):"); ++ while (fgets(buff, 512, f) != NULL) { ++ if (sscanf(buff, "nameserver%*[ \t]%256s", buff2) == 1) { ++ char *c = strchr(buff2, '%'); ++ if (c) { ++ if_index = if_nametoindex(c + 1); ++ *c = '\0'; ++ } else { ++ if_index = 0; ++ } ++ ++ if (!inet_pton(af, buff2, &tmp_addr)) { ++ continue; ++ } ++ /* If it's the first one, set it to dns_addr */ ++ if (!found) { ++ memcpy(pdns_addr, &tmp_addr, addrlen); ++ memcpy(cached_addr, &tmp_addr, addrlen); ++ if (scope_id) { ++ *scope_id = if_index; ++ } ++ *cached_time = curtime; ++ } ++ ++ if (++found > 3) { ++ DEBUG_MISC(" (more)"); ++ break; ++ } else if (slirp_debug & DBG_MISC) { ++ char s[INET6_ADDRSTRLEN]; ++ const char *res = inet_ntop(af, &tmp_addr, s, sizeof(s)); ++ if (!res) { ++ res = " (string conversion error)"; ++ } ++ DEBUG_MISC(" %s", res); ++ } ++ } ++ } ++ fclose(f); ++ if (!found) ++ return -1; ++ return 0; ++} ++ ++int get_dns_addr(struct in_addr *pdns_addr) ++{ ++ static struct stat dns_addr_stat; ++ ++ if (dns_addr.s_addr != 0) { ++ int ret; ++ ret = get_dns_addr_cached(pdns_addr, &dns_addr, sizeof(dns_addr), ++ &dns_addr_stat, &dns_addr_time); ++ if (ret <= 0) { ++ return ret; ++ } ++ } ++ return get_dns_addr_resolv_conf(AF_INET, pdns_addr, &dns_addr, ++ sizeof(dns_addr), NULL, &dns_addr_time); ++} ++ ++int get_dns6_addr(struct in6_addr *pdns6_addr, uint32_t *scope_id) ++{ ++ static struct stat dns6_addr_stat; ++ ++ if (!in6_zero(&dns6_addr)) { ++ int ret; ++ ret = get_dns_addr_cached(pdns6_addr, &dns6_addr, sizeof(dns6_addr), ++ &dns6_addr_stat, &dns6_addr_time); ++ if (ret <= 0) { ++ return ret; ++ } ++ } ++ return get_dns_addr_resolv_conf(AF_INET6, pdns6_addr, &dns6_addr, ++ sizeof(dns6_addr), scope_id, ++ &dns6_addr_time); ++} ++ ++#endif ++ ++static void slirp_init_once(void) ++{ ++ static int initialized; ++ const char *debug; ++#ifdef _WIN32 ++ WSADATA Data; ++#endif ++ ++ if (initialized) { ++ return; ++ } ++ initialized = 1; ++ ++#ifdef _WIN32 ++ WSAStartup(MAKEWORD(2, 0), &Data); ++ atexit(winsock_cleanup); ++#endif ++ ++ loopback_addr.s_addr = htonl(INADDR_LOOPBACK); ++ loopback_mask = htonl(IN_CLASSA_NET); ++ ++ debug = g_getenv("SLIRP_DEBUG"); ++ if (debug) { ++ const GDebugKey keys[] = { ++ { "call", DBG_CALL }, ++ { "misc", DBG_MISC }, ++ { "error", DBG_ERROR }, ++ { "tftp", DBG_TFTP }, ++ { "verbose_call", DBG_VERBOSE_CALL }, ++ }; ++ slirp_debug = g_parse_debug_string(debug, keys, G_N_ELEMENTS(keys)); ++ } ++} ++ ++Slirp *slirp_new(const SlirpConfig *cfg, const SlirpCb *callbacks, void *opaque) ++{ ++ Slirp *slirp; ++ ++ g_return_val_if_fail(cfg != NULL, NULL); ++ g_return_val_if_fail(cfg->version >= SLIRP_CONFIG_VERSION_MIN, NULL); ++ g_return_val_if_fail(cfg->version <= SLIRP_CONFIG_VERSION_MAX, NULL); ++ g_return_val_if_fail(cfg->if_mtu >= IF_MTU_MIN || cfg->if_mtu == 0, NULL); ++ g_return_val_if_fail(cfg->if_mtu <= IF_MTU_MAX, NULL); ++ g_return_val_if_fail(cfg->if_mru >= IF_MRU_MIN || cfg->if_mru == 0, NULL); ++ g_return_val_if_fail(cfg->if_mru <= IF_MRU_MAX, NULL); ++ g_return_val_if_fail(!cfg->bootfile || ++ (strlen(cfg->bootfile) < ++ G_SIZEOF_MEMBER(struct bootp_t, bp_file)), NULL); ++ ++ slirp = g_malloc0(sizeof(Slirp)); ++ ++ slirp_init_once(); ++ ++ slirp->opaque = opaque; ++ slirp->cb = callbacks; ++ slirp->grand = g_rand_new(); ++ slirp->restricted = cfg->restricted; ++ ++ slirp->in_enabled = cfg->in_enabled; ++ slirp->in6_enabled = cfg->in6_enabled; ++ ++ if_init(slirp); ++ ip_init(slirp); ++ ip6_init(slirp); ++ ++ m_init(slirp); ++ ++ slirp->vnetwork_addr = cfg->vnetwork; ++ slirp->vnetwork_mask = cfg->vnetmask; ++ slirp->vhost_addr = cfg->vhost; ++ slirp->vprefix_addr6 = cfg->vprefix_addr6; ++ slirp->vprefix_len = cfg->vprefix_len; ++ slirp->vhost_addr6 = cfg->vhost6; ++ if (cfg->vhostname) { ++ slirp_pstrcpy(slirp->client_hostname, sizeof(slirp->client_hostname), ++ cfg->vhostname); ++ } ++ slirp->tftp_prefix = g_strdup(cfg->tftp_path); ++ slirp->bootp_filename = g_strdup(cfg->bootfile); ++ slirp->vdomainname = g_strdup(cfg->vdomainname); ++ slirp->vdhcp_startaddr = cfg->vdhcp_start; ++ slirp->vnameserver_addr = cfg->vnameserver; ++ slirp->vnameserver_addr6 = cfg->vnameserver6; ++ slirp->tftp_server_name = g_strdup(cfg->tftp_server_name); ++ ++ if (cfg->vdnssearch) { ++ translate_dnssearch(slirp, cfg->vdnssearch); ++ } ++ slirp->if_mtu = cfg->if_mtu == 0 ? IF_MTU_DEFAULT : cfg->if_mtu; ++ slirp->if_mru = cfg->if_mru == 0 ? IF_MRU_DEFAULT : cfg->if_mru; ++ slirp->disable_host_loopback = cfg->disable_host_loopback; ++ slirp->enable_emu = cfg->enable_emu; ++ ++ if (cfg->version >= 2) { ++ slirp->outbound_addr = cfg->outbound_addr; ++ slirp->outbound_addr6 = cfg->outbound_addr6; ++ } else { ++ slirp->outbound_addr = NULL; ++ slirp->outbound_addr6 = NULL; ++ } ++ ++ if (cfg->version >= 3) { ++ slirp->disable_dns = cfg->disable_dns; ++ } else { ++ slirp->disable_dns = false; ++ } ++ ++ return slirp; ++} ++ ++Slirp *slirp_init(int restricted, bool in_enabled, struct in_addr vnetwork, ++ struct in_addr vnetmask, struct in_addr vhost, ++ bool in6_enabled, struct in6_addr vprefix_addr6, ++ uint8_t vprefix_len, struct in6_addr vhost6, ++ const char *vhostname, const char *tftp_server_name, ++ const char *tftp_path, const char *bootfile, ++ struct in_addr vdhcp_start, struct in_addr vnameserver, ++ struct in6_addr vnameserver6, const char **vdnssearch, ++ const char *vdomainname, const SlirpCb *callbacks, ++ void *opaque) ++{ ++ SlirpConfig cfg; ++ memset(&cfg, 0, sizeof(cfg)); ++ cfg.version = 1; ++ cfg.restricted = restricted; ++ cfg.in_enabled = in_enabled; ++ cfg.vnetwork = vnetwork; ++ cfg.vnetmask = vnetmask; ++ cfg.vhost = vhost; ++ cfg.in6_enabled = in6_enabled; ++ cfg.vprefix_addr6 = vprefix_addr6; ++ cfg.vprefix_len = vprefix_len; ++ cfg.vhost6 = vhost6; ++ cfg.vhostname = vhostname; ++ cfg.tftp_server_name = tftp_server_name; ++ cfg.tftp_path = tftp_path; ++ cfg.bootfile = bootfile; ++ cfg.vdhcp_start = vdhcp_start; ++ cfg.vnameserver = vnameserver; ++ cfg.vnameserver6 = vnameserver6; ++ cfg.vdnssearch = vdnssearch; ++ cfg.vdomainname = vdomainname; ++ return slirp_new(&cfg, callbacks, opaque); ++} ++ ++void slirp_cleanup(Slirp *slirp) ++{ ++ struct gfwd_list *e, *next; ++ ++ for (e = slirp->guestfwd_list; e; e = next) { ++ next = e->ex_next; ++ g_free(e->ex_exec); ++ g_free(e->ex_unix); ++ g_free(e); ++ } ++ ++ ip_cleanup(slirp); ++ ip6_cleanup(slirp); ++ m_cleanup(slirp); ++ ++ g_rand_free(slirp->grand); ++ ++ g_free(slirp->vdnssearch); ++ g_free(slirp->tftp_prefix); ++ g_free(slirp->bootp_filename); ++ g_free(slirp->vdomainname); ++ g_free(slirp); ++} ++ ++#define CONN_CANFSEND(so) \ ++ (((so)->so_state & (SS_FCANTSENDMORE | SS_ISFCONNECTED)) == SS_ISFCONNECTED) ++#define CONN_CANFRCV(so) \ ++ (((so)->so_state & (SS_FCANTRCVMORE | SS_ISFCONNECTED)) == SS_ISFCONNECTED) ++ ++static void slirp_update_timeout(Slirp *slirp, uint32_t *timeout) ++{ ++ uint32_t t; ++ ++ if (*timeout <= TIMEOUT_FAST) { ++ return; ++ } ++ ++ t = MIN(1000, *timeout); ++ ++ /* If we have tcp timeout with slirp, then we will fill @timeout with ++ * more precise value. ++ */ ++ if (slirp->time_fasttimo) { ++ *timeout = TIMEOUT_FAST; ++ return; ++ } ++ if (slirp->do_slowtimo) { ++ t = MIN(TIMEOUT_SLOW, t); ++ } ++ *timeout = t; ++} ++ ++void slirp_pollfds_fill(Slirp *slirp, uint32_t *timeout, ++ SlirpAddPollCb add_poll, void *opaque) ++{ ++ struct socket *so, *so_next; ++ ++ /* ++ * First, TCP sockets ++ */ ++ ++ /* ++ * *_slowtimo needs calling if there are IP fragments ++ * in the fragment queue, or there are TCP connections active ++ */ ++ slirp->do_slowtimo = ((slirp->tcb.so_next != &slirp->tcb) || ++ (&slirp->ipq.ip_link != slirp->ipq.ip_link.next)); ++ ++ for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so_next) { ++ int events = 0; ++ ++ so_next = so->so_next; ++ ++ so->pollfds_idx = -1; ++ ++ /* ++ * See if we need a tcp_fasttimo ++ */ ++ if (slirp->time_fasttimo == 0 && so->so_tcpcb->t_flags & TF_DELACK) { ++ slirp->time_fasttimo = curtime; /* Flag when want a fasttimo */ ++ } ++ ++ /* ++ * NOFDREF can include still connecting to local-host, ++ * newly socreated() sockets etc. Don't want to select these. ++ */ ++ if (so->so_state & SS_NOFDREF || so->s == -1) { ++ continue; ++ } ++ ++ /* ++ * Set for reading sockets which are accepting ++ */ ++ if (so->so_state & SS_FACCEPTCONN) { ++ so->pollfds_idx = add_poll( ++ so->s, SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR, opaque); ++ continue; ++ } ++ ++ /* ++ * Set for writing sockets which are connecting ++ */ ++ if (so->so_state & SS_ISFCONNECTING) { ++ so->pollfds_idx = ++ add_poll(so->s, SLIRP_POLL_OUT | SLIRP_POLL_ERR, opaque); ++ continue; ++ } ++ ++ /* ++ * Set for writing if we are connected, can send more, and ++ * we have something to send ++ */ ++ if (CONN_CANFSEND(so) && so->so_rcv.sb_cc) { ++ events |= SLIRP_POLL_OUT | SLIRP_POLL_ERR; ++ } ++ ++ /* ++ * Set for reading (and urgent data) if we are connected, can ++ * receive more, and we have room for it XXX /2 ? ++ */ ++ if (CONN_CANFRCV(so) && ++ (so->so_snd.sb_cc < (so->so_snd.sb_datalen / 2))) { ++ events |= SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR | ++ SLIRP_POLL_PRI; ++ } ++ ++ if (events) { ++ so->pollfds_idx = add_poll(so->s, events, opaque); ++ } ++ } ++ ++ /* ++ * UDP sockets ++ */ ++ for (so = slirp->udb.so_next; so != &slirp->udb; so = so_next) { ++ so_next = so->so_next; ++ ++ so->pollfds_idx = -1; ++ ++ /* ++ * See if it's timed out ++ */ ++ if (so->so_expire) { ++ if (so->so_expire <= curtime) { ++ udp_detach(so); ++ continue; ++ } else { ++ slirp->do_slowtimo = true; /* Let socket expire */ ++ } ++ } ++ ++ /* ++ * When UDP packets are received from over the ++ * link, they're sendto()'d straight away, so ++ * no need for setting for writing ++ * Limit the number of packets queued by this session ++ * to 4. Note that even though we try and limit this ++ * to 4 packets, the session could have more queued ++ * if the packets needed to be fragmented ++ * (XXX <= 4 ?) ++ */ ++ if ((so->so_state & SS_ISFCONNECTED) && so->so_queued <= 4) { ++ so->pollfds_idx = add_poll( ++ so->s, SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR, opaque); ++ } ++ } ++ ++ /* ++ * ICMP sockets ++ */ ++ for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so_next) { ++ so_next = so->so_next; ++ ++ so->pollfds_idx = -1; ++ ++ /* ++ * See if it's timed out ++ */ ++ if (so->so_expire) { ++ if (so->so_expire <= curtime) { ++ icmp_detach(so); ++ continue; ++ } else { ++ slirp->do_slowtimo = true; /* Let socket expire */ ++ } ++ } ++ ++ if (so->so_state & SS_ISFCONNECTED) { ++ so->pollfds_idx = add_poll( ++ so->s, SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR, opaque); ++ } ++ } ++ ++ slirp_update_timeout(slirp, timeout); ++} ++ ++void slirp_pollfds_poll(Slirp *slirp, int select_error, ++ SlirpGetREventsCb get_revents, void *opaque) ++{ ++ struct socket *so, *so_next; ++ int ret; ++ ++ curtime = slirp->cb->clock_get_ns(slirp->opaque) / SCALE_MS; ++ ++ /* ++ * See if anything has timed out ++ */ ++ if (slirp->time_fasttimo && ++ ((curtime - slirp->time_fasttimo) >= TIMEOUT_FAST)) { ++ tcp_fasttimo(slirp); ++ slirp->time_fasttimo = 0; ++ } ++ if (slirp->do_slowtimo && ++ ((curtime - slirp->last_slowtimo) >= TIMEOUT_SLOW)) { ++ ip_slowtimo(slirp); ++ tcp_slowtimo(slirp); ++ slirp->last_slowtimo = curtime; ++ } ++ ++ /* ++ * Check sockets ++ */ ++ if (!select_error) { ++ /* ++ * Check TCP sockets ++ */ ++ for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so_next) { ++ int revents; ++ ++ so_next = so->so_next; ++ ++ revents = 0; ++ if (so->pollfds_idx != -1) { ++ revents = get_revents(so->pollfds_idx, opaque); ++ } ++ ++ if (so->so_state & SS_NOFDREF || so->s == -1) { ++ continue; ++ } ++ ++#ifndef __APPLE__ ++ /* ++ * Check for URG data ++ * This will soread as well, so no need to ++ * test for SLIRP_POLL_IN below if this succeeds. ++ * ++ * This is however disabled on MacOS, which apparently always ++ * reports data as PRI when it is the last data of the ++ * connection. We would then report it out of band, which the guest ++ * would most probably not be ready for. ++ */ ++ if (revents & SLIRP_POLL_PRI) { ++ ret = sorecvoob(so); ++ if (ret < 0) { ++ /* Socket error might have resulted in the socket being ++ * removed, do not try to do anything more with it. */ ++ continue; ++ } ++ } ++ /* ++ * Check sockets for reading ++ */ ++ else ++#endif ++ if (revents & ++ (SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR | SLIRP_POLL_PRI)) { ++ /* ++ * Check for incoming connections ++ */ ++ if (so->so_state & SS_FACCEPTCONN) { ++ tcp_connect(so); ++ continue; ++ } /* else */ ++ ret = soread(so); ++ ++ /* Output it if we read something */ ++ if (ret > 0) { ++ tcp_output(sototcpcb(so)); ++ } ++ if (ret < 0) { ++ /* Socket error might have resulted in the socket being ++ * removed, do not try to do anything more with it. */ ++ continue; ++ } ++ } ++ ++ /* ++ * Check sockets for writing ++ */ ++ if (!(so->so_state & SS_NOFDREF) && ++ (revents & (SLIRP_POLL_OUT | SLIRP_POLL_ERR))) { ++ /* ++ * Check for non-blocking, still-connecting sockets ++ */ ++ if (so->so_state & SS_ISFCONNECTING) { ++ /* Connected */ ++ so->so_state &= ~SS_ISFCONNECTING; ++ ++ ret = send(so->s, (const void *)&ret, 0, 0); ++ if (ret < 0) { ++ /* XXXXX Must fix, zero bytes is a NOP */ ++ if (errno == EAGAIN || errno == EWOULDBLOCK || ++ errno == EINPROGRESS || errno == ENOTCONN) { ++ continue; ++ } ++ ++ /* else failed */ ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_NOFDREF; ++ } ++ /* else so->so_state &= ~SS_ISFCONNECTING; */ ++ ++ /* ++ * Continue tcp_input ++ */ ++ tcp_input((struct mbuf *)NULL, sizeof(struct ip), so, ++ so->so_ffamily); ++ /* continue; */ ++ } else { ++ ret = sowrite(so); ++ if (ret > 0) { ++ /* Call tcp_output in case we need to send a window ++ * update to the guest, otherwise it will be stuck ++ * until it sends a window probe. */ ++ tcp_output(sototcpcb(so)); ++ } ++ } ++ } ++ } ++ ++ /* ++ * Now UDP sockets. ++ * Incoming packets are sent straight away, they're not buffered. ++ * Incoming UDP data isn't buffered either. ++ */ ++ for (so = slirp->udb.so_next; so != &slirp->udb; so = so_next) { ++ int revents; ++ ++ so_next = so->so_next; ++ ++ revents = 0; ++ if (so->pollfds_idx != -1) { ++ revents = get_revents(so->pollfds_idx, opaque); ++ } ++ ++ if (so->s != -1 && ++ (revents & (SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR))) { ++ sorecvfrom(so); ++ } ++ } ++ ++ /* ++ * Check incoming ICMP relies. ++ */ ++ for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so_next) { ++ int revents; ++ ++ so_next = so->so_next; ++ ++ revents = 0; ++ if (so->pollfds_idx != -1) { ++ revents = get_revents(so->pollfds_idx, opaque); ++ } ++ ++ if (so->s != -1 && ++ (revents & (SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR))) { ++ icmp_receive(so); ++ } ++ } ++ } ++ ++ if_start(slirp); ++} ++ ++static void arp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) ++{ ++ const struct slirp_arphdr *ah = ++ (const struct slirp_arphdr *)(pkt + ETH_HLEN); ++ uint8_t arp_reply[MAX(ETH_HLEN + sizeof(struct slirp_arphdr), 64)]; ++ struct ethhdr *reh = (struct ethhdr *)arp_reply; ++ struct slirp_arphdr *rah = (struct slirp_arphdr *)(arp_reply + ETH_HLEN); ++ int ar_op; ++ struct gfwd_list *ex_ptr; ++ ++ if (!slirp->in_enabled) { ++ return; ++ } ++ ++ if (pkt_len < ETH_HLEN + sizeof(struct slirp_arphdr)) { ++ return; /* packet too short */ ++ } ++ ++ ar_op = ntohs(ah->ar_op); ++ switch (ar_op) { ++ case ARPOP_REQUEST: ++ if (ah->ar_tip == ah->ar_sip) { ++ /* Gratuitous ARP */ ++ arp_table_add(slirp, ah->ar_sip, ah->ar_sha); ++ return; ++ } ++ ++ if ((ah->ar_tip & slirp->vnetwork_mask.s_addr) == ++ slirp->vnetwork_addr.s_addr) { ++ if (ah->ar_tip == slirp->vnameserver_addr.s_addr || ++ ah->ar_tip == slirp->vhost_addr.s_addr) ++ goto arp_ok; ++ /* TODO: IPv6 */ ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ++ ex_ptr = ex_ptr->ex_next) { ++ if (ex_ptr->ex_addr.s_addr == ah->ar_tip) ++ goto arp_ok; ++ } ++ return; ++ arp_ok: ++ memset(arp_reply, 0, sizeof(arp_reply)); ++ ++ arp_table_add(slirp, ah->ar_sip, ah->ar_sha); ++ ++ /* ARP request for alias/dns mac address */ ++ memcpy(reh->h_dest, pkt + ETH_ALEN, ETH_ALEN); ++ memcpy(reh->h_source, special_ethaddr, ETH_ALEN - 4); ++ memcpy(&reh->h_source[2], &ah->ar_tip, 4); ++ reh->h_proto = htons(ETH_P_ARP); ++ ++ rah->ar_hrd = htons(1); ++ rah->ar_pro = htons(ETH_P_IP); ++ rah->ar_hln = ETH_ALEN; ++ rah->ar_pln = 4; ++ rah->ar_op = htons(ARPOP_REPLY); ++ memcpy(rah->ar_sha, reh->h_source, ETH_ALEN); ++ rah->ar_sip = ah->ar_tip; ++ memcpy(rah->ar_tha, ah->ar_sha, ETH_ALEN); ++ rah->ar_tip = ah->ar_sip; ++ slirp_send_packet_all(slirp, arp_reply, sizeof(arp_reply)); ++ } ++ break; ++ case ARPOP_REPLY: ++ arp_table_add(slirp, ah->ar_sip, ah->ar_sha); ++ break; ++ default: ++ break; ++ } ++} ++ ++void slirp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) ++{ ++ struct mbuf *m; ++ int proto; ++ ++ if (pkt_len < ETH_HLEN) ++ return; ++ ++ proto = (((uint16_t)pkt[12]) << 8) + pkt[13]; ++ switch (proto) { ++ case ETH_P_ARP: ++ arp_input(slirp, pkt, pkt_len); ++ break; ++ case ETH_P_IP: ++ case ETH_P_IPV6: ++ m = m_get(slirp); ++ if (!m) ++ return; ++ /* Note: we add 2 to align the IP header on 4 bytes, ++ * and add the margin for the tcpiphdr overhead */ ++ if (M_FREEROOM(m) < pkt_len + TCPIPHDR_DELTA + 2) { ++ m_inc(m, pkt_len + TCPIPHDR_DELTA + 2); ++ } ++ m->m_len = pkt_len + TCPIPHDR_DELTA + 2; ++ memcpy(m->m_data + TCPIPHDR_DELTA + 2, pkt, pkt_len); ++ ++ m->m_data += TCPIPHDR_DELTA + 2 + ETH_HLEN; ++ m->m_len -= TCPIPHDR_DELTA + 2 + ETH_HLEN; ++ ++ if (proto == ETH_P_IP) { ++ ip_input(m); ++ } else if (proto == ETH_P_IPV6) { ++ ip6_input(m); ++ } ++ break; ++ ++ case ETH_P_NCSI: ++ ncsi_input(slirp, pkt, pkt_len); ++ break; ++ ++ default: ++ break; ++ } ++} ++ ++/* Prepare the IPv4 packet to be sent to the ethernet device. Returns 1 if no ++ * packet should be sent, 0 if the packet must be re-queued, 2 if the packet ++ * is ready to go. ++ */ ++static int if_encap4(Slirp *slirp, struct mbuf *ifm, struct ethhdr *eh, ++ uint8_t ethaddr[ETH_ALEN]) ++{ ++ const struct ip *iph = (const struct ip *)ifm->m_data; ++ ++ if (!arp_table_search(slirp, iph->ip_dst.s_addr, ethaddr)) { ++ uint8_t arp_req[ETH_HLEN + sizeof(struct slirp_arphdr)]; ++ struct ethhdr *reh = (struct ethhdr *)arp_req; ++ struct slirp_arphdr *rah = (struct slirp_arphdr *)(arp_req + ETH_HLEN); ++ ++ if (!ifm->resolution_requested) { ++ /* If the client addr is not known, send an ARP request */ ++ memset(reh->h_dest, 0xff, ETH_ALEN); ++ memcpy(reh->h_source, special_ethaddr, ETH_ALEN - 4); ++ memcpy(&reh->h_source[2], &slirp->vhost_addr, 4); ++ reh->h_proto = htons(ETH_P_ARP); ++ rah->ar_hrd = htons(1); ++ rah->ar_pro = htons(ETH_P_IP); ++ rah->ar_hln = ETH_ALEN; ++ rah->ar_pln = 4; ++ rah->ar_op = htons(ARPOP_REQUEST); ++ ++ /* source hw addr */ ++ memcpy(rah->ar_sha, special_ethaddr, ETH_ALEN - 4); ++ memcpy(&rah->ar_sha[2], &slirp->vhost_addr, 4); ++ ++ /* source IP */ ++ rah->ar_sip = slirp->vhost_addr.s_addr; ++ ++ /* target hw addr (none) */ ++ memset(rah->ar_tha, 0, ETH_ALEN); ++ ++ /* target IP */ ++ rah->ar_tip = iph->ip_dst.s_addr; ++ slirp->client_ipaddr = iph->ip_dst; ++ slirp_send_packet_all(slirp, arp_req, sizeof(arp_req)); ++ ifm->resolution_requested = true; ++ ++ /* Expire request and drop outgoing packet after 1 second */ ++ ifm->expiration_date = ++ slirp->cb->clock_get_ns(slirp->opaque) + 1000000000ULL; ++ } ++ return 0; ++ } else { ++ memcpy(eh->h_source, special_ethaddr, ETH_ALEN - 4); ++ /* XXX: not correct */ ++ memcpy(&eh->h_source[2], &slirp->vhost_addr, 4); ++ eh->h_proto = htons(ETH_P_IP); ++ ++ /* Send this */ ++ return 2; ++ } ++} ++ ++/* Prepare the IPv6 packet to be sent to the ethernet device. Returns 1 if no ++ * packet should be sent, 0 if the packet must be re-queued, 2 if the packet ++ * is ready to go. ++ */ ++static int if_encap6(Slirp *slirp, struct mbuf *ifm, struct ethhdr *eh, ++ uint8_t ethaddr[ETH_ALEN]) ++{ ++ const struct ip6 *ip6h = mtod(ifm, const struct ip6 *); ++ if (!ndp_table_search(slirp, ip6h->ip_dst, ethaddr)) { ++ if (!ifm->resolution_requested) { ++ ndp_send_ns(slirp, ip6h->ip_dst); ++ ifm->resolution_requested = true; ++ ifm->expiration_date = ++ slirp->cb->clock_get_ns(slirp->opaque) + 1000000000ULL; ++ } ++ return 0; ++ } else { ++ eh->h_proto = htons(ETH_P_IPV6); ++ in6_compute_ethaddr(ip6h->ip_src, eh->h_source); ++ ++ /* Send this */ ++ return 2; ++ } ++} ++ ++/* Output the IP packet to the ethernet device. Returns 0 if the packet must be ++ * re-queued. ++ */ ++int if_encap(Slirp *slirp, struct mbuf *ifm) ++{ ++ uint8_t buf[IF_MTU_MAX + 100]; ++ struct ethhdr *eh = (struct ethhdr *)buf; ++ uint8_t ethaddr[ETH_ALEN]; ++ const struct ip *iph = (const struct ip *)ifm->m_data; ++ int ret; ++ char ethaddr_str[ETH_ADDRSTRLEN]; ++ ++ if (ifm->m_len + ETH_HLEN > sizeof(buf)) { ++ return 1; ++ } ++ ++ switch (iph->ip_v) { ++ case IPVERSION: ++ ret = if_encap4(slirp, ifm, eh, ethaddr); ++ if (ret < 2) { ++ return ret; ++ } ++ break; ++ ++ case IP6VERSION: ++ ret = if_encap6(slirp, ifm, eh, ethaddr); ++ if (ret < 2) { ++ return ret; ++ } ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ } ++ ++ memcpy(eh->h_dest, ethaddr, ETH_ALEN); ++ DEBUG_ARG("src = %s", slirp_ether_ntoa(eh->h_source, ethaddr_str, ++ sizeof(ethaddr_str))); ++ DEBUG_ARG("dst = %s", slirp_ether_ntoa(eh->h_dest, ethaddr_str, ++ sizeof(ethaddr_str))); ++ memcpy(buf + sizeof(struct ethhdr), ifm->m_data, ifm->m_len); ++ slirp_send_packet_all(slirp, buf, ifm->m_len + ETH_HLEN); ++ return 1; ++} ++ ++/* Drop host forwarding rule, return 0 if found. */ ++int slirp_remove_hostfwd(Slirp *slirp, int is_udp, struct in_addr host_addr, ++ int host_port) ++{ ++ struct socket *so; ++ struct socket *head = (is_udp ? &slirp->udb : &slirp->tcb); ++ struct sockaddr_in addr; ++ int port = htons(host_port); ++ socklen_t addr_len; ++ ++ for (so = head->so_next; so != head; so = so->so_next) { ++ addr_len = sizeof(addr); ++ if ((so->so_state & SS_HOSTFWD) && ++ getsockname(so->s, (struct sockaddr *)&addr, &addr_len) == 0 && ++ addr_len == sizeof(addr) && ++ addr.sin_family == AF_INET && ++ addr.sin_addr.s_addr == host_addr.s_addr && ++ addr.sin_port == port) { ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ sofree(so); ++ return 0; ++ } ++ } ++ ++ return -1; ++} ++ ++int slirp_add_hostfwd(Slirp *slirp, int is_udp, struct in_addr host_addr, ++ int host_port, struct in_addr guest_addr, int guest_port) ++{ ++ if (!guest_addr.s_addr) { ++ guest_addr = slirp->vdhcp_startaddr; ++ } ++ if (is_udp) { ++ if (!udp_listen(slirp, host_addr.s_addr, htons(host_port), ++ guest_addr.s_addr, htons(guest_port), SS_HOSTFWD)) ++ return -1; ++ } else { ++ if (!tcp_listen(slirp, host_addr.s_addr, htons(host_port), ++ guest_addr.s_addr, htons(guest_port), SS_HOSTFWD)) ++ return -1; ++ } ++ return 0; ++} ++ ++int slirp_remove_hostxfwd(Slirp *slirp, ++ const struct sockaddr *haddr, socklen_t haddrlen, ++ int flags) ++{ ++ struct socket *so; ++ struct socket *head = (flags & SLIRP_HOSTFWD_UDP ? &slirp->udb : &slirp->tcb); ++ struct sockaddr_storage addr; ++ socklen_t addr_len; ++ ++ for (so = head->so_next; so != head; so = so->so_next) { ++ addr_len = sizeof(addr); ++ if ((so->so_state & SS_HOSTFWD) && ++ getsockname(so->s, (struct sockaddr *)&addr, &addr_len) == 0 && ++ sockaddr_equal(&addr, (const struct sockaddr_storage *) haddr)) { ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ sofree(so); ++ return 0; ++ } ++ } ++ ++ return -1; ++} ++ ++int slirp_add_hostxfwd(Slirp *slirp, ++ const struct sockaddr *haddr, socklen_t haddrlen, ++ const struct sockaddr *gaddr, socklen_t gaddrlen, ++ int flags) ++{ ++ struct sockaddr_in gdhcp_addr; ++ int fwd_flags = SS_HOSTFWD; ++ ++ if (flags & SLIRP_HOSTFWD_V6ONLY) ++ fwd_flags |= SS_HOSTFWD_V6ONLY; ++ ++ if (gaddr->sa_family == AF_INET) { ++ const struct sockaddr_in *gaddr_in = (const struct sockaddr_in *) gaddr; ++ ++ if (gaddrlen < sizeof(struct sockaddr_in)) { ++ errno = EINVAL; ++ return -1; ++ } ++ ++ if (!gaddr_in->sin_addr.s_addr) { ++ gdhcp_addr = *gaddr_in; ++ gdhcp_addr.sin_addr = slirp->vdhcp_startaddr; ++ gaddr = (struct sockaddr *) &gdhcp_addr; ++ gaddrlen = sizeof(gdhcp_addr); ++ } ++ } else { ++ if (gaddrlen < sizeof(struct sockaddr_in6)) { ++ errno = EINVAL; ++ return -1; ++ } ++ ++ /* ++ * Libslirp currently only provides a stateless DHCPv6 server, thus ++ * we can't translate "addr-any" to the guest here. Instead, we defer ++ * performing the translation to when it's needed. See ++ * soassign_guest_addr_if_needed(). ++ */ ++ } ++ ++ if (flags & SLIRP_HOSTFWD_UDP) { ++ if (!udpx_listen(slirp, haddr, haddrlen, ++ gaddr, gaddrlen, ++ fwd_flags)) ++ return -1; ++ } else { ++ if (!tcpx_listen(slirp, haddr, haddrlen, ++ gaddr, gaddrlen, ++ fwd_flags)) ++ return -1; ++ } ++ return 0; ++} ++ ++/* TODO: IPv6 */ ++static bool check_guestfwd(Slirp *slirp, struct in_addr *guest_addr, ++ int guest_port) ++{ ++ struct gfwd_list *tmp_ptr; ++ ++ if (!guest_addr->s_addr) { ++ guest_addr->s_addr = slirp->vnetwork_addr.s_addr | ++ (htonl(0x0204) & ~slirp->vnetwork_mask.s_addr); ++ } ++ if ((guest_addr->s_addr & slirp->vnetwork_mask.s_addr) != ++ slirp->vnetwork_addr.s_addr || ++ guest_addr->s_addr == slirp->vhost_addr.s_addr || ++ guest_addr->s_addr == slirp->vnameserver_addr.s_addr) { ++ return false; ++ } ++ ++ /* check if the port is "bound" */ ++ for (tmp_ptr = slirp->guestfwd_list; tmp_ptr; tmp_ptr = tmp_ptr->ex_next) { ++ if (guest_port == tmp_ptr->ex_fport && ++ guest_addr->s_addr == tmp_ptr->ex_addr.s_addr) ++ return false; ++ } ++ ++ return true; ++} ++ ++int slirp_add_exec(Slirp *slirp, const char *cmdline, ++ struct in_addr *guest_addr, int guest_port) ++{ ++ if (!check_guestfwd(slirp, guest_addr, guest_port)) { ++ return -1; ++ } ++ ++ add_exec(&slirp->guestfwd_list, cmdline, *guest_addr, htons(guest_port)); ++ return 0; ++} ++ ++int slirp_add_unix(Slirp *slirp, const char *unixsock, ++ struct in_addr *guest_addr, int guest_port) ++{ ++#ifdef G_OS_UNIX ++ if (!check_guestfwd(slirp, guest_addr, guest_port)) { ++ return -1; ++ } ++ ++ add_unix(&slirp->guestfwd_list, unixsock, *guest_addr, htons(guest_port)); ++ return 0; ++#else ++ g_warn_if_reached(); ++ return -1; ++#endif ++} ++ ++int slirp_add_guestfwd(Slirp *slirp, SlirpWriteCb write_cb, void *opaque, ++ struct in_addr *guest_addr, int guest_port) ++{ ++ if (!check_guestfwd(slirp, guest_addr, guest_port)) { ++ return -1; ++ } ++ ++ add_guestfwd(&slirp->guestfwd_list, write_cb, opaque, *guest_addr, ++ htons(guest_port)); ++ return 0; ++} ++ ++int slirp_remove_guestfwd(Slirp *slirp, struct in_addr guest_addr, ++ int guest_port) ++{ ++ return remove_guestfwd(&slirp->guestfwd_list, guest_addr, ++ htons(guest_port)); ++} ++ ++ssize_t slirp_send(struct socket *so, const void *buf, size_t len, int flags) ++{ ++ if (so->s == -1 && so->guestfwd) { ++ /* XXX this blocks entire thread. Rewrite to use ++ * qemu_chr_fe_write and background I/O callbacks */ ++ so->guestfwd->write_cb(buf, len, so->guestfwd->opaque); ++ return len; ++ } ++ ++ if (so->s == -1) { ++ /* ++ * This should in theory not happen but it is hard to be ++ * sure because some code paths will end up with so->s == -1 ++ * on a failure but don't dispose of the struct socket. ++ * Check specifically, so we don't pass -1 to send(). ++ */ ++ errno = EBADF; ++ return -1; ++ } ++ ++ return send(so->s, buf, len, flags); ++} ++ ++struct socket *slirp_find_ctl_socket(Slirp *slirp, struct in_addr guest_addr, ++ int guest_port) ++{ ++ struct socket *so; ++ ++ /* TODO: IPv6 */ ++ for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so->so_next) { ++ if (so->so_faddr.s_addr == guest_addr.s_addr && ++ htons(so->so_fport) == guest_port) { ++ return so; ++ } ++ } ++ return NULL; ++} ++ ++size_t slirp_socket_can_recv(Slirp *slirp, struct in_addr guest_addr, ++ int guest_port) ++{ ++ struct iovec iov[2]; ++ struct socket *so; ++ ++ so = slirp_find_ctl_socket(slirp, guest_addr, guest_port); ++ ++ if (!so || so->so_state & SS_NOFDREF) { ++ return 0; ++ } ++ ++ if (!CONN_CANFRCV(so) || so->so_snd.sb_cc >= (so->so_snd.sb_datalen / 2)) { ++ return 0; ++ } ++ ++ return sopreprbuf(so, iov, NULL); ++} ++ ++void slirp_socket_recv(Slirp *slirp, struct in_addr guest_addr, int guest_port, ++ const uint8_t *buf, int size) ++{ ++ int ret; ++ struct socket *so = slirp_find_ctl_socket(slirp, guest_addr, guest_port); ++ ++ if (!so) ++ return; ++ ++ ret = soreadbuf(so, (const char *)buf, size); ++ ++ if (ret > 0) ++ tcp_output(sototcpcb(so)); ++} ++ ++void slirp_send_packet_all(Slirp *slirp, const void *buf, size_t len) ++{ ++ ssize_t ret = slirp->cb->send_packet(buf, len, slirp->opaque); ++ ++ if (ret < 0) { ++ g_critical("Failed to send packet, ret: %ld", (long)ret); ++ } else if (ret < len) { ++ DEBUG_ERROR("send_packet() didn't send all data: %ld < %lu", (long)ret, ++ (unsigned long)len); ++ } ++} +diff --git a/slirp/src/slirp.h b/slirp/src/slirp.h +new file mode 100644 +index 0000000000..89d79f3de5 +--- /dev/null ++++ b/slirp/src/slirp.h +@@ -0,0 +1,289 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++#ifndef SLIRP_H ++#define SLIRP_H ++ ++#ifdef _WIN32 ++ ++/* as defined in sdkddkver.h */ ++#ifndef _WIN32_WINNT ++#define _WIN32_WINNT 0x0600 /* Vista */ ++#endif ++/* reduces the number of implicitly included headers */ ++#ifndef WIN32_LEAN_AND_MEAN ++#define WIN32_LEAN_AND_MEAN ++#endif ++ ++#include ++#include ++#include ++#include ++#include ++ ++#else ++#define O_BINARY 0 ++#endif ++ ++#ifndef _WIN32 ++#include ++#include ++#include ++#include ++#include ++#endif ++ ++#ifdef __APPLE__ ++#include ++#endif ++ ++/* Avoid conflicting with the libc insque() and remque(), which ++ have different prototypes. */ ++#define insque slirp_insque ++#define remque slirp_remque ++#define quehead slirp_quehead ++ ++#include "debug.h" ++#include "util.h" ++ ++#include "libslirp.h" ++#include "ip.h" ++#include "ip6.h" ++#include "tcp.h" ++#include "tcp_timer.h" ++#include "tcp_var.h" ++#include "tcpip.h" ++#include "udp.h" ++#include "ip_icmp.h" ++#include "ip6_icmp.h" ++#include "mbuf.h" ++#include "sbuf.h" ++#include "socket.h" ++#include "if.h" ++#include "main.h" ++#include "misc.h" ++ ++#include "bootp.h" ++#include "tftp.h" ++ ++#define ARPOP_REQUEST 1 /* ARP request */ ++#define ARPOP_REPLY 2 /* ARP reply */ ++ ++struct ethhdr { ++ unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ ++ unsigned char h_source[ETH_ALEN]; /* source ether addr */ ++ unsigned short h_proto; /* packet type ID field */ ++}; ++ ++struct slirp_arphdr { ++ unsigned short ar_hrd; /* format of hardware address */ ++ unsigned short ar_pro; /* format of protocol address */ ++ unsigned char ar_hln; /* length of hardware address */ ++ unsigned char ar_pln; /* length of protocol address */ ++ unsigned short ar_op; /* ARP opcode (command) */ ++ ++ /* ++ * Ethernet looks like this : This bit is variable sized however... ++ */ ++ uint8_t ar_sha[ETH_ALEN]; /* sender hardware address */ ++ uint32_t ar_sip; /* sender IP address */ ++ uint8_t ar_tha[ETH_ALEN]; /* target hardware address */ ++ uint32_t ar_tip; /* target IP address */ ++} SLIRP_PACKED; ++ ++#define ARP_TABLE_SIZE 16 ++ ++typedef struct ArpTable { ++ struct slirp_arphdr table[ARP_TABLE_SIZE]; ++ int next_victim; ++} ArpTable; ++ ++void arp_table_add(Slirp *slirp, uint32_t ip_addr, ++ const uint8_t ethaddr[ETH_ALEN]); ++ ++bool arp_table_search(Slirp *slirp, uint32_t ip_addr, ++ uint8_t out_ethaddr[ETH_ALEN]); ++ ++struct ndpentry { ++ uint8_t eth_addr[ETH_ALEN]; /* sender hardware address */ ++ struct in6_addr ip_addr; /* sender IP address */ ++}; ++ ++#define NDP_TABLE_SIZE 16 ++ ++typedef struct NdpTable { ++ struct ndpentry table[NDP_TABLE_SIZE]; ++ /* ++ * The table is a cache with old entries overwritten when the table fills. ++ * Preserve the first entry: it is the guest, which is needed for lazy ++ * hostfwd guest address assignment. ++ */ ++ struct in6_addr guest_in6_addr; ++ int next_victim; ++} NdpTable; ++ ++void ndp_table_add(Slirp *slirp, struct in6_addr ip_addr, ++ uint8_t ethaddr[ETH_ALEN]); ++bool ndp_table_search(Slirp *slirp, struct in6_addr ip_addr, ++ uint8_t out_ethaddr[ETH_ALEN]); ++ ++struct Slirp { ++ unsigned time_fasttimo; ++ unsigned last_slowtimo; ++ bool do_slowtimo; ++ ++ bool in_enabled, in6_enabled; ++ ++ /* virtual network configuration */ ++ struct in_addr vnetwork_addr; ++ struct in_addr vnetwork_mask; ++ struct in_addr vhost_addr; ++ struct in6_addr vprefix_addr6; ++ uint8_t vprefix_len; ++ struct in6_addr vhost_addr6; ++ struct in_addr vdhcp_startaddr; ++ struct in_addr vnameserver_addr; ++ struct in6_addr vnameserver_addr6; ++ ++ struct in_addr client_ipaddr; ++ char client_hostname[33]; ++ ++ int restricted; ++ struct gfwd_list *guestfwd_list; ++ ++ int if_mtu; ++ int if_mru; ++ ++ bool disable_host_loopback; ++ ++ /* mbuf states */ ++ struct quehead m_freelist; ++ struct quehead m_usedlist; ++ int mbuf_alloced; ++ ++ /* if states */ ++ struct quehead if_fastq; /* fast queue (for interactive data) */ ++ struct quehead if_batchq; /* queue for non-interactive data */ ++ bool if_start_busy; /* avoid if_start recursion */ ++ ++ /* ip states */ ++ struct ipq ipq; /* ip reass. queue */ ++ uint16_t ip_id; /* ip packet ctr, for ids */ ++ ++ /* bootp/dhcp states */ ++ BOOTPClient bootp_clients[NB_BOOTP_CLIENTS]; ++ char *bootp_filename; ++ size_t vdnssearch_len; ++ uint8_t *vdnssearch; ++ char *vdomainname; ++ ++ /* tcp states */ ++ struct socket tcb; ++ struct socket *tcp_last_so; ++ tcp_seq tcp_iss; /* tcp initial send seq # */ ++ uint32_t tcp_now; /* for RFC 1323 timestamps */ ++ ++ /* udp states */ ++ struct socket udb; ++ struct socket *udp_last_so; ++ ++ /* icmp states */ ++ struct socket icmp; ++ struct socket *icmp_last_so; ++ ++ /* tftp states */ ++ char *tftp_prefix; ++ struct tftp_session tftp_sessions[TFTP_SESSIONS_MAX]; ++ char *tftp_server_name; ++ ++ ArpTable arp_table; ++ NdpTable ndp_table; ++ ++ GRand *grand; ++ void *ra_timer; ++ ++ bool enable_emu; ++ ++ const SlirpCb *cb; ++ void *opaque; ++ ++ struct sockaddr_in *outbound_addr; ++ struct sockaddr_in6 *outbound_addr6; ++ bool disable_dns; /* slirp will not redirect/serve any DNS packet */ ++}; ++ ++void if_start(Slirp *); ++ ++int get_dns_addr(struct in_addr *pdns_addr); ++int get_dns6_addr(struct in6_addr *pdns6_addr, uint32_t *scope_id); ++ ++/* ncsi.c */ ++void ncsi_input(Slirp *slirp, const uint8_t *pkt, int pkt_len); ++ ++#ifndef _WIN32 ++#include ++#endif ++ ++ ++extern bool slirp_do_keepalive; ++ ++#define TCP_MAXIDLE (TCPTV_KEEPCNT * TCPTV_KEEPINTVL) ++ ++/* dnssearch.c */ ++int translate_dnssearch(Slirp *s, const char **names); ++ ++/* cksum.c */ ++int cksum(struct mbuf *m, int len); ++int ip6_cksum(struct mbuf *m); ++ ++/* if.c */ ++void if_init(Slirp *); ++void if_output(struct socket *, struct mbuf *); ++ ++/* ip_input.c */ ++void ip_init(Slirp *); ++void ip_cleanup(Slirp *); ++void ip_input(struct mbuf *); ++void ip_slowtimo(Slirp *); ++void ip_stripoptions(register struct mbuf *, struct mbuf *); ++ ++/* ip_output.c */ ++int ip_output(struct socket *, struct mbuf *); ++ ++/* ip6_input.c */ ++void ip6_init(Slirp *); ++void ip6_cleanup(Slirp *); ++void ip6_input(struct mbuf *); ++ ++/* ip6_output */ ++int ip6_output(struct socket *, struct mbuf *, int fast); ++ ++/* tcp_input.c */ ++void tcp_input(register struct mbuf *, int, struct socket *, unsigned short af); ++int tcp_mss(register struct tcpcb *, unsigned); ++ ++/* tcp_output.c */ ++int tcp_output(register struct tcpcb *); ++void tcp_setpersist(register struct tcpcb *); ++ ++/* tcp_subr.c */ ++void tcp_init(Slirp *); ++void tcp_cleanup(Slirp *); ++void tcp_template(struct tcpcb *); ++void tcp_respond(struct tcpcb *, register struct tcpiphdr *, ++ register struct mbuf *, tcp_seq, tcp_seq, int, unsigned short); ++struct tcpcb *tcp_newtcpcb(struct socket *); ++struct tcpcb *tcp_close(register struct tcpcb *); ++void tcp_sockclosed(struct tcpcb *); ++int tcp_fconnect(struct socket *, unsigned short af); ++void tcp_connect(struct socket *); ++void tcp_attach(struct socket *); ++uint8_t tcp_tos(struct socket *); ++int tcp_emu(struct socket *, struct mbuf *); ++int tcp_ctl(struct socket *); ++struct tcpcb *tcp_drop(struct tcpcb *tp, int err); ++ ++struct socket *slirp_find_ctl_socket(Slirp *slirp, struct in_addr guest_addr, ++ int guest_port); ++ ++void slirp_send_packet_all(Slirp *slirp, const void *buf, size_t len); ++ ++#endif +diff --git a/slirp/src/socket.c b/slirp/src/socket.c +new file mode 100644 +index 0000000000..2c1b789d48 +--- /dev/null ++++ b/slirp/src/socket.c +@@ -0,0 +1,1104 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++#include "ip_icmp.h" ++#ifdef __sun__ ++#include ++#endif ++#ifdef __linux__ ++#include ++#endif ++ ++static void sofcantrcvmore(struct socket *so); ++static void sofcantsendmore(struct socket *so); ++ ++struct socket *solookup(struct socket **last, struct socket *head, ++ struct sockaddr_storage *lhost, ++ struct sockaddr_storage *fhost) ++{ ++ struct socket *so = *last; ++ ++ /* Optimisation */ ++ if (so != head && sockaddr_equal(&(so->lhost.ss), lhost) && ++ (!fhost || sockaddr_equal(&so->fhost.ss, fhost))) { ++ return so; ++ } ++ ++ for (so = head->so_next; so != head; so = so->so_next) { ++ if (sockaddr_equal(&(so->lhost.ss), lhost) && ++ (!fhost || sockaddr_equal(&so->fhost.ss, fhost))) { ++ *last = so; ++ return so; ++ } ++ } ++ ++ return (struct socket *)NULL; ++} ++ ++/* ++ * Create a new socket, initialise the fields ++ * It is the responsibility of the caller to ++ * insque() it into the correct linked-list ++ */ ++struct socket *socreate(Slirp *slirp) ++{ ++ struct socket *so = g_new(struct socket, 1); ++ ++ memset(so, 0, sizeof(struct socket)); ++ so->so_state = SS_NOFDREF; ++ so->s = -1; ++ so->slirp = slirp; ++ so->pollfds_idx = -1; ++ ++ return so; ++} ++ ++/* ++ * Remove references to so from the given message queue. ++ */ ++static void soqfree(struct socket *so, struct quehead *qh) ++{ ++ struct mbuf *ifq; ++ ++ for (ifq = (struct mbuf *)qh->qh_link; (struct quehead *)ifq != qh; ++ ifq = ifq->ifq_next) { ++ if (ifq->ifq_so == so) { ++ struct mbuf *ifm; ++ ifq->ifq_so = NULL; ++ for (ifm = ifq->ifs_next; ifm != ifq; ifm = ifm->ifs_next) { ++ ifm->ifq_so = NULL; ++ } ++ } ++ } ++} ++ ++/* ++ * remque and free a socket, clobber cache ++ */ ++void sofree(struct socket *so) ++{ ++ Slirp *slirp = so->slirp; ++ ++ soqfree(so, &slirp->if_fastq); ++ soqfree(so, &slirp->if_batchq); ++ ++ if (so == slirp->tcp_last_so) { ++ slirp->tcp_last_so = &slirp->tcb; ++ } else if (so == slirp->udp_last_so) { ++ slirp->udp_last_so = &slirp->udb; ++ } else if (so == slirp->icmp_last_so) { ++ slirp->icmp_last_so = &slirp->icmp; ++ } ++ m_free(so->so_m); ++ ++ if (so->so_next && so->so_prev) ++ remque(so); /* crashes if so is not in a queue */ ++ ++ if (so->so_tcpcb) { ++ g_free(so->so_tcpcb); ++ } ++ g_free(so); ++} ++ ++size_t sopreprbuf(struct socket *so, struct iovec *iov, int *np) ++{ ++ int n, lss, total; ++ struct sbuf *sb = &so->so_snd; ++ int len = sb->sb_datalen - sb->sb_cc; ++ int mss = so->so_tcpcb->t_maxseg; ++ ++ DEBUG_CALL("sopreprbuf"); ++ DEBUG_ARG("so = %p", so); ++ ++ if (len <= 0) ++ return 0; ++ ++ iov[0].iov_base = sb->sb_wptr; ++ iov[1].iov_base = NULL; ++ iov[1].iov_len = 0; ++ if (sb->sb_wptr < sb->sb_rptr) { ++ iov[0].iov_len = sb->sb_rptr - sb->sb_wptr; ++ /* Should never succeed, but... */ ++ if (iov[0].iov_len > len) ++ iov[0].iov_len = len; ++ if (iov[0].iov_len > mss) ++ iov[0].iov_len -= iov[0].iov_len % mss; ++ n = 1; ++ } else { ++ iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr; ++ /* Should never succeed, but... */ ++ if (iov[0].iov_len > len) ++ iov[0].iov_len = len; ++ len -= iov[0].iov_len; ++ if (len) { ++ iov[1].iov_base = sb->sb_data; ++ iov[1].iov_len = sb->sb_rptr - sb->sb_data; ++ if (iov[1].iov_len > len) ++ iov[1].iov_len = len; ++ total = iov[0].iov_len + iov[1].iov_len; ++ if (total > mss) { ++ lss = total % mss; ++ if (iov[1].iov_len > lss) { ++ iov[1].iov_len -= lss; ++ n = 2; ++ } else { ++ lss -= iov[1].iov_len; ++ iov[0].iov_len -= lss; ++ n = 1; ++ } ++ } else ++ n = 2; ++ } else { ++ if (iov[0].iov_len > mss) ++ iov[0].iov_len -= iov[0].iov_len % mss; ++ n = 1; ++ } ++ } ++ if (np) ++ *np = n; ++ ++ return iov[0].iov_len + (n - 1) * iov[1].iov_len; ++} ++ ++/* ++ * Read from so's socket into sb_snd, updating all relevant sbuf fields ++ * NOTE: This will only be called if it is select()ed for reading, so ++ * a read() of 0 (or less) means it's disconnected ++ */ ++int soread(struct socket *so) ++{ ++ int n, nn; ++ size_t buf_len; ++ struct sbuf *sb = &so->so_snd; ++ struct iovec iov[2]; ++ ++ DEBUG_CALL("soread"); ++ DEBUG_ARG("so = %p", so); ++ ++ /* ++ * No need to check if there's enough room to read. ++ * soread wouldn't have been called if there weren't ++ */ ++ buf_len = sopreprbuf(so, iov, &n); ++ assert(buf_len != 0); ++ ++ nn = recv(so->s, iov[0].iov_base, iov[0].iov_len, 0); ++ if (nn <= 0) { ++ if (nn < 0 && (errno == EINTR || errno == EAGAIN)) ++ return 0; ++ else { ++ int err; ++ socklen_t elen = sizeof err; ++ struct sockaddr_storage addr; ++ struct sockaddr *paddr = (struct sockaddr *)&addr; ++ socklen_t alen = sizeof addr; ++ ++ err = errno; ++ if (nn == 0) { ++ int shutdown_wr = so->so_state & SS_FCANTSENDMORE; ++ ++ if (!shutdown_wr && getpeername(so->s, paddr, &alen) < 0) { ++ err = errno; ++ } else { ++ getsockopt(so->s, SOL_SOCKET, SO_ERROR, &err, &elen); ++ } ++ } ++ ++ DEBUG_MISC(" --- soread() disconnected, nn = %d, errno = %d-%s", nn, ++ errno, strerror(errno)); ++ sofcantrcvmore(so); ++ ++ if (err == ECONNRESET || err == ECONNREFUSED || err == ENOTCONN || ++ err == EPIPE) { ++ tcp_drop(sototcpcb(so), err); ++ } else { ++ tcp_sockclosed(sototcpcb(so)); ++ } ++ return -1; ++ } ++ } ++ ++ /* ++ * If there was no error, try and read the second time round ++ * We read again if n = 2 (ie, there's another part of the buffer) ++ * and we read as much as we could in the first read ++ * We don't test for <= 0 this time, because there legitimately ++ * might not be any more data (since the socket is non-blocking), ++ * a close will be detected on next iteration. ++ * A return of -1 won't (shouldn't) happen, since it didn't happen above ++ */ ++ if (n == 2 && nn == iov[0].iov_len) { ++ int ret; ++ ret = recv(so->s, iov[1].iov_base, iov[1].iov_len, 0); ++ if (ret > 0) ++ nn += ret; ++ } ++ ++ DEBUG_MISC(" ... read nn = %d bytes", nn); ++ ++ /* Update fields */ ++ sb->sb_cc += nn; ++ sb->sb_wptr += nn; ++ if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen)) ++ sb->sb_wptr -= sb->sb_datalen; ++ return nn; ++} ++ ++int soreadbuf(struct socket *so, const char *buf, int size) ++{ ++ int n, nn, copy = size; ++ struct sbuf *sb = &so->so_snd; ++ struct iovec iov[2]; ++ ++ DEBUG_CALL("soreadbuf"); ++ DEBUG_ARG("so = %p", so); ++ ++ /* ++ * No need to check if there's enough room to read. ++ * soread wouldn't have been called if there weren't ++ */ ++ assert(size > 0); ++ if (sopreprbuf(so, iov, &n) < size) ++ goto err; ++ ++ nn = MIN(iov[0].iov_len, copy); ++ memcpy(iov[0].iov_base, buf, nn); ++ ++ copy -= nn; ++ buf += nn; ++ ++ if (copy == 0) ++ goto done; ++ ++ memcpy(iov[1].iov_base, buf, copy); ++ ++done: ++ /* Update fields */ ++ sb->sb_cc += size; ++ sb->sb_wptr += size; ++ if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen)) ++ sb->sb_wptr -= sb->sb_datalen; ++ return size; ++err: ++ ++ sofcantrcvmore(so); ++ tcp_sockclosed(sototcpcb(so)); ++ g_critical("soreadbuf buffer too small"); ++ return -1; ++} ++ ++/* ++ * Get urgent data ++ * ++ * When the socket is created, we set it SO_OOBINLINE, ++ * so when OOB data arrives, we soread() it and everything ++ * in the send buffer is sent as urgent data ++ */ ++int sorecvoob(struct socket *so) ++{ ++ struct tcpcb *tp = sototcpcb(so); ++ int ret; ++ ++ DEBUG_CALL("sorecvoob"); ++ DEBUG_ARG("so = %p", so); ++ ++ /* ++ * We take a guess at how much urgent data has arrived. ++ * In most situations, when urgent data arrives, the next ++ * read() should get all the urgent data. This guess will ++ * be wrong however if more data arrives just after the ++ * urgent data, or the read() doesn't return all the ++ * urgent data. ++ */ ++ ret = soread(so); ++ if (ret > 0) { ++ tp->snd_up = tp->snd_una + so->so_snd.sb_cc; ++ tp->t_force = 1; ++ tcp_output(tp); ++ tp->t_force = 0; ++ } ++ ++ return ret; ++} ++ ++/* ++ * Send urgent data ++ * There's a lot duplicated code here, but... ++ */ ++int sosendoob(struct socket *so) ++{ ++ struct sbuf *sb = &so->so_rcv; ++ char buff[2048]; /* XXX Shouldn't be sending more oob data than this */ ++ ++ int n; ++ ++ DEBUG_CALL("sosendoob"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("sb->sb_cc = %d", sb->sb_cc); ++ ++ if (so->so_urgc > sizeof(buff)) ++ so->so_urgc = sizeof(buff); /* XXXX */ ++ ++ if (sb->sb_rptr < sb->sb_wptr) { ++ /* We can send it directly */ ++ n = slirp_send(so, sb->sb_rptr, so->so_urgc, ++ (MSG_OOB)); /* |MSG_DONTWAIT)); */ ++ } else { ++ /* ++ * Since there's no sendv or sendtov like writev, ++ * we must copy all data to a linear buffer then ++ * send it all ++ */ ++ uint32_t urgc = so->so_urgc; /* Amount of room left in buff */ ++ int len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr; ++ if (len > urgc) { ++ len = urgc; ++ } ++ memcpy(buff, sb->sb_rptr, len); ++ urgc -= len; ++ if (urgc) { ++ /* We still have some room for the rest */ ++ n = sb->sb_wptr - sb->sb_data; ++ if (n > urgc) { ++ n = urgc; ++ } ++ memcpy((buff + len), sb->sb_data, n); ++ len += n; ++ } ++ n = slirp_send(so, buff, len, (MSG_OOB)); /* |MSG_DONTWAIT)); */ ++#ifdef DEBUG ++ if (n != len) { ++ DEBUG_ERROR("Didn't send all data urgently XXXXX"); ++ } ++#endif ++ } ++ ++ if (n < 0) { ++ return n; ++ } ++ so->so_urgc -= n; ++ DEBUG_MISC(" ---2 sent %d bytes urgent data, %d urgent bytes left", n, ++ so->so_urgc); ++ ++ sb->sb_cc -= n; ++ sb->sb_rptr += n; ++ if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen)) ++ sb->sb_rptr -= sb->sb_datalen; ++ ++ return n; ++} ++ ++/* ++ * Write data from so_rcv to so's socket, ++ * updating all sbuf field as necessary ++ */ ++int sowrite(struct socket *so) ++{ ++ int n, nn; ++ struct sbuf *sb = &so->so_rcv; ++ int len = sb->sb_cc; ++ struct iovec iov[2]; ++ ++ DEBUG_CALL("sowrite"); ++ DEBUG_ARG("so = %p", so); ++ ++ if (so->so_urgc) { ++ uint32_t expected = so->so_urgc; ++ if (sosendoob(so) < expected) { ++ /* Treat a short write as a fatal error too, ++ * rather than continuing on and sending the urgent ++ * data as if it were non-urgent and leaving the ++ * so_urgc count wrong. ++ */ ++ goto err_disconnected; ++ } ++ if (sb->sb_cc == 0) ++ return 0; ++ } ++ ++ /* ++ * No need to check if there's something to write, ++ * sowrite wouldn't have been called otherwise ++ */ ++ ++ iov[0].iov_base = sb->sb_rptr; ++ iov[1].iov_base = NULL; ++ iov[1].iov_len = 0; ++ if (sb->sb_rptr < sb->sb_wptr) { ++ iov[0].iov_len = sb->sb_wptr - sb->sb_rptr; ++ /* Should never succeed, but... */ ++ if (iov[0].iov_len > len) ++ iov[0].iov_len = len; ++ n = 1; ++ } else { ++ iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr; ++ if (iov[0].iov_len > len) ++ iov[0].iov_len = len; ++ len -= iov[0].iov_len; ++ if (len) { ++ iov[1].iov_base = sb->sb_data; ++ iov[1].iov_len = sb->sb_wptr - sb->sb_data; ++ if (iov[1].iov_len > len) ++ iov[1].iov_len = len; ++ n = 2; ++ } else ++ n = 1; ++ } ++ /* Check if there's urgent data to send, and if so, send it */ ++ ++ nn = slirp_send(so, iov[0].iov_base, iov[0].iov_len, 0); ++ /* This should never happen, but people tell me it does *shrug* */ ++ if (nn < 0 && (errno == EAGAIN || errno == EINTR)) ++ return 0; ++ ++ if (nn <= 0) { ++ goto err_disconnected; ++ } ++ ++ if (n == 2 && nn == iov[0].iov_len) { ++ int ret; ++ ret = slirp_send(so, iov[1].iov_base, iov[1].iov_len, 0); ++ if (ret > 0) ++ nn += ret; ++ } ++ DEBUG_MISC(" ... wrote nn = %d bytes", nn); ++ ++ /* Update sbuf */ ++ sb->sb_cc -= nn; ++ sb->sb_rptr += nn; ++ if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen)) ++ sb->sb_rptr -= sb->sb_datalen; ++ ++ /* ++ * If in DRAIN mode, and there's no more data, set ++ * it CANTSENDMORE ++ */ ++ if ((so->so_state & SS_FWDRAIN) && sb->sb_cc == 0) ++ sofcantsendmore(so); ++ ++ return nn; ++ ++err_disconnected: ++ DEBUG_MISC(" --- sowrite disconnected, so->so_state = %x, errno = %d", ++ so->so_state, errno); ++ sofcantsendmore(so); ++ tcp_sockclosed(sototcpcb(so)); ++ return -1; ++} ++ ++/* ++ * recvfrom() a UDP socket ++ */ ++void sorecvfrom(struct socket *so) ++{ ++ struct sockaddr_storage addr; ++ struct sockaddr_storage saddr, daddr; ++ socklen_t addrlen = sizeof(struct sockaddr_storage); ++ char buff[256]; ++ ++#ifdef __linux__ ++ ssize_t size; ++ struct msghdr msg; ++ struct iovec iov; ++ char control[1024]; ++ ++ /* First look for errors */ ++ memset(&msg, 0, sizeof(msg)); ++ msg.msg_name = &saddr; ++ msg.msg_namelen = sizeof(saddr); ++ msg.msg_control = control; ++ msg.msg_controllen = sizeof(control); ++ iov.iov_base = buff; ++ iov.iov_len = sizeof(buff); ++ msg.msg_iov = &iov; ++ msg.msg_iovlen = 1; ++ ++ size = recvmsg(so->s, &msg, MSG_ERRQUEUE); ++ if (size >= 0) { ++ struct cmsghdr *cmsg; ++ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { ++ ++ if (cmsg->cmsg_level == IPPROTO_IP && ++ cmsg->cmsg_type == IP_RECVERR) { ++ struct sock_extended_err *ee = ++ (struct sock_extended_err *) CMSG_DATA(cmsg); ++ ++ if (ee->ee_origin == SO_EE_ORIGIN_ICMP) { ++ /* Got an ICMP error, forward it */ ++ struct sockaddr_in *sin; ++ ++ sin = (struct sockaddr_in *) SO_EE_OFFENDER(ee); ++ icmp_forward_error(so->so_m, ee->ee_type, ee->ee_code, ++ 0, NULL, &sin->sin_addr); ++ } ++ } ++ else if (cmsg->cmsg_level == IPPROTO_IPV6 && ++ cmsg->cmsg_type == IPV6_RECVERR) { ++ struct sock_extended_err *ee = ++ (struct sock_extended_err *) CMSG_DATA(cmsg); ++ ++ if (ee->ee_origin == SO_EE_ORIGIN_ICMP6) { ++ /* Got an ICMPv6 error, forward it */ ++ struct sockaddr_in6 *sin6; ++ ++ sin6 = (struct sockaddr_in6 *) SO_EE_OFFENDER(ee); ++ icmp6_forward_error(so->so_m, ee->ee_type, ee->ee_code, ++ &sin6->sin6_addr); ++ } ++ } ++ } ++ return; ++ } ++#endif ++ ++ DEBUG_CALL("sorecvfrom"); ++ DEBUG_ARG("so = %p", so); ++ ++ if (so->so_type == IPPROTO_ICMP) { /* This is a "ping" reply */ ++ int len; ++ ++ len = recvfrom(so->s, buff, 256, 0, (struct sockaddr *)&addr, &addrlen); ++ /* XXX Check if reply is "correct"? */ ++ ++ if (len == -1 || len == 0) { ++ uint8_t code = ICMP_UNREACH_PORT; ++ ++ if (errno == EHOSTUNREACH) ++ code = ICMP_UNREACH_HOST; ++ else if (errno == ENETUNREACH) ++ code = ICMP_UNREACH_NET; ++ ++ DEBUG_MISC(" udp icmp rx errno = %d-%s", errno, strerror(errno)); ++ icmp_send_error(so->so_m, ICMP_UNREACH, code, 0, strerror(errno)); ++ } else { ++ icmp_reflect(so->so_m); ++ so->so_m = NULL; /* Don't m_free() it again! */ ++ } ++ /* No need for this socket anymore, udp_detach it */ ++ udp_detach(so); ++ } else { /* A "normal" UDP packet */ ++ struct mbuf *m; ++ int len; ++#ifdef _WIN32 ++ unsigned long n; ++#else ++ int n; ++#endif ++ ++ if (ioctlsocket(so->s, FIONREAD, &n) != 0) { ++ DEBUG_MISC(" ioctlsocket errno = %d-%s\n", errno, strerror(errno)); ++ return; ++ } ++ ++ m = m_get(so->slirp); ++ if (!m) { ++ return; ++ } ++ switch (so->so_ffamily) { ++ case AF_INET: ++ m->m_data += IF_MAXLINKHDR + sizeof(struct udpiphdr); ++ break; ++ case AF_INET6: ++ m->m_data += ++ IF_MAXLINKHDR + sizeof(struct ip6) + sizeof(struct udphdr); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ ++ /* ++ * XXX Shouldn't FIONREAD packets destined for port 53, ++ * but I don't know the max packet size for DNS lookups ++ */ ++ len = M_FREEROOM(m); ++ /* if (so->so_fport != htons(53)) { */ ++ ++ if (n > len) { ++ n = (m->m_data - m->m_dat) + m->m_len + n + 1; ++ m_inc(m, n); ++ len = M_FREEROOM(m); ++ } ++ /* } */ ++ ++ m->m_len = recvfrom(so->s, m->m_data, len, 0, (struct sockaddr *)&addr, ++ &addrlen); ++ DEBUG_MISC(" did recvfrom %d, errno = %d-%s", m->m_len, errno, ++ strerror(errno)); ++ if (m->m_len < 0) { ++ /* Report error as ICMP */ ++ switch (so->so_lfamily) { ++ uint8_t code; ++ case AF_INET: ++ code = ICMP_UNREACH_PORT; ++ ++ if (errno == EHOSTUNREACH) { ++ code = ICMP_UNREACH_HOST; ++ } else if (errno == ENETUNREACH) { ++ code = ICMP_UNREACH_NET; ++ } ++ ++ DEBUG_MISC(" rx error, tx icmp ICMP_UNREACH:%i", code); ++ icmp_send_error(so->so_m, ICMP_UNREACH, code, 0, ++ strerror(errno)); ++ break; ++ case AF_INET6: ++ code = ICMP6_UNREACH_PORT; ++ ++ if (errno == EHOSTUNREACH) { ++ code = ICMP6_UNREACH_ADDRESS; ++ } else if (errno == ENETUNREACH) { ++ code = ICMP6_UNREACH_NO_ROUTE; ++ } ++ ++ DEBUG_MISC(" rx error, tx icmp6 ICMP_UNREACH:%i", code); ++ icmp6_send_error(so->so_m, ICMP6_UNREACH, code); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ m_free(m); ++ } else { ++ /* ++ * Hack: domain name lookup will be used the most for UDP, ++ * and since they'll only be used once there's no need ++ * for the 4 minute (or whatever) timeout... So we time them ++ * out much quicker (10 seconds for now...) ++ */ ++ if (so->so_expire) { ++ if (so->so_fport == htons(53)) ++ so->so_expire = curtime + SO_EXPIREFAST; ++ else ++ so->so_expire = curtime + SO_EXPIRE; ++ } ++ ++ /* ++ * If this packet was destined for CTL_ADDR, ++ * make it look like that's where it came from ++ */ ++ saddr = addr; ++ sotranslate_in(so, &saddr); ++ ++ /* Perform lazy guest IP address resolution if needed. */ ++ if (so->so_state & SS_HOSTFWD) { ++ if (soassign_guest_addr_if_needed(so) < 0) { ++ DEBUG_MISC(" guest address not available yet"); ++ switch (so->so_lfamily) { ++ case AF_INET: ++ icmp_send_error(so->so_m, ICMP_UNREACH, ++ ICMP_UNREACH_HOST, 0, ++ "guest address not available yet"); ++ break; ++ case AF_INET6: ++ icmp6_send_error(so->so_m, ICMP6_UNREACH, ++ ICMP6_UNREACH_ADDRESS); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ m_free(m); ++ return; ++ } ++ } ++ daddr = so->lhost.ss; ++ ++ switch (so->so_ffamily) { ++ case AF_INET: ++ udp_output(so, m, (struct sockaddr_in *)&saddr, ++ (struct sockaddr_in *)&daddr, so->so_iptos); ++ break; ++ case AF_INET6: ++ udp6_output(so, m, (struct sockaddr_in6 *)&saddr, ++ (struct sockaddr_in6 *)&daddr); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ } /* rx error */ ++ } /* if ping packet */ ++} ++ ++/* ++ * sendto() a socket ++ */ ++int sosendto(struct socket *so, struct mbuf *m) ++{ ++ int ret; ++ struct sockaddr_storage addr; ++ ++ DEBUG_CALL("sosendto"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ ++ addr = so->fhost.ss; ++ DEBUG_CALL(" sendto()ing)"); ++ if (sotranslate_out(so, &addr) < 0) { ++ return -1; ++ } ++ ++ /* Don't care what port we get */ ++ ret = sendto(so->s, m->m_data, m->m_len, 0, (struct sockaddr *)&addr, ++ sockaddr_size(&addr)); ++ if (ret < 0) ++ return -1; ++ ++ /* ++ * Kill the socket if there's no reply in 4 minutes, ++ * but only if it's an expirable socket ++ */ ++ if (so->so_expire) ++ so->so_expire = curtime + SO_EXPIRE; ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_ISFCONNECTED; /* So that it gets select()ed */ ++ return 0; ++} ++ ++/* ++ * Listen for incoming TCP connections ++ * On failure errno contains the reason. ++ */ ++struct socket *tcpx_listen(Slirp *slirp, ++ const struct sockaddr *haddr, socklen_t haddrlen, ++ const struct sockaddr *laddr, socklen_t laddrlen, ++ int flags) ++{ ++ struct socket *so; ++ int s, opt = 1; ++ socklen_t addrlen; ++ ++ DEBUG_CALL("tcpx_listen"); ++ /* AF_INET6 addresses are bigger than AF_INET, so this is big enough. */ ++ char addrstr[INET6_ADDRSTRLEN]; ++ char portstr[6]; ++ int ret; ++ ret = getnameinfo(haddr, haddrlen, addrstr, sizeof(addrstr), portstr, sizeof(portstr), NI_NUMERICHOST|NI_NUMERICSERV); ++ g_assert(ret == 0); ++ DEBUG_ARG("haddr = %s", addrstr); ++ DEBUG_ARG("hport = %s", portstr); ++ ret = getnameinfo(laddr, laddrlen, addrstr, sizeof(addrstr), portstr, sizeof(portstr), NI_NUMERICHOST|NI_NUMERICSERV); ++ g_assert(ret == 0); ++ DEBUG_ARG("laddr = %s", addrstr); ++ DEBUG_ARG("lport = %s", portstr); ++ DEBUG_ARG("flags = %x", flags); ++ ++ /* ++ * SS_HOSTFWD sockets can be accepted multiple times, so they can't be ++ * SS_FACCEPTONCE. Also, SS_HOSTFWD connections can be accepted and ++ * immediately closed if the guest address isn't available yet, which is ++ * incompatible with the "accept once" concept. Correct code will never ++ * request both, so disallow their combination by assertion. ++ */ ++ g_assert(!((flags & SS_HOSTFWD) && (flags & SS_FACCEPTONCE))); ++ ++ so = socreate(slirp); ++ ++ /* Don't tcp_attach... we don't need so_snd nor so_rcv */ ++ so->so_tcpcb = tcp_newtcpcb(so); ++ insque(so, &slirp->tcb); ++ ++ /* ++ * SS_FACCEPTONCE sockets must time out. ++ */ ++ if (flags & SS_FACCEPTONCE) ++ so->so_tcpcb->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT * 2; ++ ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= (SS_FACCEPTCONN | flags); ++ ++ sockaddr_copy(&so->lhost.sa, sizeof(so->lhost), laddr, laddrlen); ++ ++ s = slirp_socket(haddr->sa_family, SOCK_STREAM, 0); ++ if ((s < 0) || ++ (haddr->sa_family == AF_INET6 && slirp_socket_set_v6only(s, (flags & SS_HOSTFWD_V6ONLY) != 0) < 0) || ++ (slirp_socket_set_fast_reuse(s) < 0) || ++ (bind(s, haddr, haddrlen) < 0) || ++ (listen(s, 1) < 0)) { ++ int tmperrno = errno; /* Don't clobber the real reason we failed */ ++ if (s >= 0) { ++ closesocket(s); ++ } ++ sofree(so); ++ /* Restore the real errno */ ++#ifdef _WIN32 ++ WSASetLastError(tmperrno); ++#else ++ errno = tmperrno; ++#endif ++ return NULL; ++ } ++ setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int)); ++ slirp_socket_set_nodelay(s); ++ ++ addrlen = sizeof(so->fhost); ++ getsockname(s, &so->fhost.sa, &addrlen); ++ sotranslate_accept(so); ++ ++ so->s = s; ++ return so; ++} ++ ++struct socket *tcp_listen(Slirp *slirp, uint32_t haddr, unsigned hport, ++ uint32_t laddr, unsigned lport, int flags) ++{ ++ struct sockaddr_in hsa, lsa; ++ ++ memset(&hsa, 0, sizeof(hsa)); ++ hsa.sin_family = AF_INET; ++ hsa.sin_addr.s_addr = haddr; ++ hsa.sin_port = hport; ++ ++ memset(&lsa, 0, sizeof(lsa)); ++ lsa.sin_family = AF_INET; ++ lsa.sin_addr.s_addr = laddr; ++ lsa.sin_port = lport; ++ ++ return tcpx_listen(slirp, (const struct sockaddr *) &hsa, sizeof(hsa), (struct sockaddr *) &lsa, sizeof(lsa), flags); ++} ++ ++/* ++ * Various session state calls ++ * XXX Should be #define's ++ * The socket state stuff needs work, these often get call 2 or 3 ++ * times each when only 1 was needed ++ */ ++void soisfconnecting(struct socket *so) ++{ ++ so->so_state &= ~(SS_NOFDREF | SS_ISFCONNECTED | SS_FCANTRCVMORE | ++ SS_FCANTSENDMORE | SS_FWDRAIN); ++ so->so_state |= SS_ISFCONNECTING; /* Clobber other states */ ++} ++ ++void soisfconnected(struct socket *so) ++{ ++ so->so_state &= ~(SS_ISFCONNECTING | SS_FWDRAIN | SS_NOFDREF); ++ so->so_state |= SS_ISFCONNECTED; /* Clobber other states */ ++} ++ ++static void sofcantrcvmore(struct socket *so) ++{ ++ if ((so->so_state & SS_NOFDREF) == 0) { ++ shutdown(so->s, 0); ++ } ++ so->so_state &= ~(SS_ISFCONNECTING); ++ if (so->so_state & SS_FCANTSENDMORE) { ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_NOFDREF; /* Don't select it */ ++ } else { ++ so->so_state |= SS_FCANTRCVMORE; ++ } ++} ++ ++static void sofcantsendmore(struct socket *so) ++{ ++ if ((so->so_state & SS_NOFDREF) == 0) { ++ shutdown(so->s, 1); /* send FIN to fhost */ ++ } ++ so->so_state &= ~(SS_ISFCONNECTING); ++ if (so->so_state & SS_FCANTRCVMORE) { ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_NOFDREF; /* as above */ ++ } else { ++ so->so_state |= SS_FCANTSENDMORE; ++ } ++} ++ ++/* ++ * Set write drain mode ++ * Set CANTSENDMORE once all data has been write()n ++ */ ++void sofwdrain(struct socket *so) ++{ ++ if (so->so_rcv.sb_cc) ++ so->so_state |= SS_FWDRAIN; ++ else ++ sofcantsendmore(so); ++} ++ ++static bool sotranslate_out4(Slirp *s, struct socket *so, struct sockaddr_in *sin) ++{ ++ if (!s->disable_dns && so->so_faddr.s_addr == s->vnameserver_addr.s_addr) { ++ return so->so_fport == htons(53) && get_dns_addr(&sin->sin_addr) >= 0; ++ } ++ ++ if (so->so_faddr.s_addr == s->vhost_addr.s_addr || ++ so->so_faddr.s_addr == 0xffffffff) { ++ if (s->disable_host_loopback) { ++ return false; ++ } ++ ++ sin->sin_addr = loopback_addr; ++ } ++ ++ return true; ++} ++ ++static bool sotranslate_out6(Slirp *s, struct socket *so, struct sockaddr_in6 *sin) ++{ ++ if (!s->disable_dns && in6_equal(&so->so_faddr6, &s->vnameserver_addr6)) { ++ uint32_t scope_id; ++ if (so->so_fport == htons(53) && get_dns6_addr(&sin->sin6_addr, &scope_id) >= 0) { ++ sin->sin6_scope_id = scope_id; ++ return true; ++ } ++ return false; ++ } ++ ++ if (in6_equal_net(&so->so_faddr6, &s->vprefix_addr6, s->vprefix_len) || ++ in6_equal(&so->so_faddr6, &(struct in6_addr)ALLNODES_MULTICAST)) { ++ if (s->disable_host_loopback) { ++ return false; ++ } ++ ++ sin->sin6_addr = in6addr_loopback; ++ } ++ ++ return true; ++} ++ ++ ++/* ++ * Translate addr in host addr when it is a virtual address ++ */ ++int sotranslate_out(struct socket *so, struct sockaddr_storage *addr) ++{ ++ bool ok = true; ++ ++ switch (addr->ss_family) { ++ case AF_INET: ++ ok = sotranslate_out4(so->slirp, so, (struct sockaddr_in *)addr); ++ break; ++ case AF_INET6: ++ ok = sotranslate_out6(so->slirp, so, (struct sockaddr_in6 *)addr); ++ break; ++ } ++ ++ if (!ok) { ++ errno = EPERM; ++ return -1; ++ } ++ ++ return 0; ++} ++ ++void sotranslate_in(struct socket *so, struct sockaddr_storage *addr) ++{ ++ Slirp *slirp = so->slirp; ++ struct sockaddr_in *sin = (struct sockaddr_in *)addr; ++ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; ++ ++ switch (addr->ss_family) { ++ case AF_INET: ++ if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) == ++ slirp->vnetwork_addr.s_addr) { ++ uint32_t inv_mask = ~slirp->vnetwork_mask.s_addr; ++ ++ if ((so->so_faddr.s_addr & inv_mask) == inv_mask) { ++ sin->sin_addr = slirp->vhost_addr; ++ } else if (sin->sin_addr.s_addr == loopback_addr.s_addr || ++ so->so_faddr.s_addr != slirp->vhost_addr.s_addr) { ++ sin->sin_addr = so->so_faddr; ++ } ++ } ++ break; ++ ++ case AF_INET6: ++ if (in6_equal_net(&so->so_faddr6, &slirp->vprefix_addr6, ++ slirp->vprefix_len)) { ++ if (in6_equal(&sin6->sin6_addr, &in6addr_loopback) || ++ !in6_equal(&so->so_faddr6, &slirp->vhost_addr6)) { ++ sin6->sin6_addr = so->so_faddr6; ++ } ++ } ++ break; ++ ++ default: ++ break; ++ } ++} ++ ++/* ++ * Translate connections from localhost to the real hostname ++ */ ++void sotranslate_accept(struct socket *so) ++{ ++ Slirp *slirp = so->slirp; ++ ++ switch (so->so_ffamily) { ++ case AF_INET: ++ if (so->so_faddr.s_addr == INADDR_ANY || ++ (so->so_faddr.s_addr & loopback_mask) == ++ (loopback_addr.s_addr & loopback_mask)) { ++ so->so_faddr = slirp->vhost_addr; ++ } ++ break; ++ ++ case AF_INET6: ++ if (in6_equal(&so->so_faddr6, &in6addr_any) || ++ in6_equal(&so->so_faddr6, &in6addr_loopback)) { ++ so->so_faddr6 = slirp->vhost_addr6; ++ } ++ break; ++ ++ default: ++ break; ++ } ++} ++ ++void sodrop(struct socket *s, int num) ++{ ++ if (sbdrop(&s->so_snd, num)) { ++ s->slirp->cb->notify(s->slirp->opaque); ++ } ++} ++ ++/* ++ * Translate "addr-any" in so->lhost to the guest's actual address. ++ * Returns 0 for success, or -1 if the guest doesn't have an address yet ++ * with errno set to EHOSTUNREACH. ++ * ++ * The guest address is taken from the first entry in the ARP table for IPv4 ++ * and the first entry in the NDP table for IPv6. ++ * Note: The IPv4 path isn't exercised yet as all hostfwd "" guest translations ++ * are handled immediately by using slirp->vdhcp_startaddr. ++ */ ++int soassign_guest_addr_if_needed(struct socket *so) ++{ ++ Slirp *slirp = so->slirp; ++ /* AF_INET6 addresses are bigger than AF_INET, so this is big enough. */ ++ char addrstr[INET6_ADDRSTRLEN]; ++ char portstr[6]; ++ ++ g_assert(so->so_state & SS_HOSTFWD); ++ ++ switch (so->so_ffamily) { ++ case AF_INET: ++ if (so->so_laddr.s_addr == INADDR_ANY) { ++ g_assert_not_reached(); ++ } ++ break; ++ ++ case AF_INET6: ++ if (in6_zero(&so->so_laddr6)) { ++ int ret; ++ if (in6_zero(&slirp->ndp_table.guest_in6_addr)) { ++ errno = EHOSTUNREACH; ++ return -1; ++ } ++ so->so_laddr6 = slirp->ndp_table.guest_in6_addr; ++ ret = getnameinfo((const struct sockaddr *) &so->lhost.ss, ++ sizeof(so->lhost.ss), addrstr, sizeof(addrstr), ++ portstr, sizeof(portstr), ++ NI_NUMERICHOST|NI_NUMERICSERV); ++ g_assert(ret == 0); ++ DEBUG_MISC("%s: new ip = [%s]:%s", __func__, addrstr, portstr); ++ } ++ break; ++ ++ default: ++ break; ++ } ++ ++ return 0; ++} +diff --git a/slirp/src/socket.h b/slirp/src/socket.h +new file mode 100644 +index 0000000000..a73175dc29 +--- /dev/null ++++ b/slirp/src/socket.h +@@ -0,0 +1,186 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef SLIRP_SOCKET_H ++#define SLIRP_SOCKET_H ++ ++#include "misc.h" ++#include "sbuf.h" ++ ++#define SO_EXPIRE 240000 ++#define SO_EXPIREFAST 10000 ++ ++/* Helps unify some in/in6 routines. */ ++union in4or6_addr { ++ struct in_addr addr4; ++ struct in6_addr addr6; ++}; ++typedef union in4or6_addr in4or6_addr; ++ ++/* ++ * Our socket structure ++ */ ++ ++union slirp_sockaddr { ++ struct sockaddr sa; ++ struct sockaddr_storage ss; ++ struct sockaddr_in sin; ++ struct sockaddr_in6 sin6; ++}; ++ ++struct socket { ++ struct socket *so_next, *so_prev; /* For a linked list of sockets */ ++ ++ int s; /* The actual socket */ ++ struct gfwd_list *guestfwd; ++ ++ int pollfds_idx; /* GPollFD GArray index */ ++ ++ Slirp *slirp; /* managing slirp instance */ ++ ++ /* XXX union these with not-yet-used sbuf params */ ++ struct mbuf *so_m; /* Pointer to the original SYN packet, ++ * for non-blocking connect()'s, and ++ * PING reply's */ ++ struct tcpiphdr *so_ti; /* Pointer to the original ti within ++ * so_mconn, for non-blocking connections */ ++ uint32_t so_urgc; ++ union slirp_sockaddr fhost; /* Foreign host */ ++#define so_faddr fhost.sin.sin_addr ++#define so_fport fhost.sin.sin_port ++#define so_faddr6 fhost.sin6.sin6_addr ++#define so_fport6 fhost.sin6.sin6_port ++#define so_ffamily fhost.ss.ss_family ++ ++ union slirp_sockaddr lhost; /* Local host */ ++#define so_laddr lhost.sin.sin_addr ++#define so_lport lhost.sin.sin_port ++#define so_laddr6 lhost.sin6.sin6_addr ++#define so_lport6 lhost.sin6.sin6_port ++#define so_lfamily lhost.ss.ss_family ++ ++ uint8_t so_iptos; /* Type of service */ ++ uint8_t so_emu; /* Is the socket emulated? */ ++ ++ uint8_t so_type; /* Type of socket, UDP or TCP */ ++ int32_t so_state; /* internal state flags SS_*, below */ ++ ++ struct tcpcb *so_tcpcb; /* pointer to TCP protocol control block */ ++ unsigned so_expire; /* When the socket will expire */ ++ ++ int so_queued; /* Number of packets queued from this socket */ ++ int so_nqueued; /* Number of packets queued in a row ++ * Used to determine when to "downgrade" a session ++ * from fastq to batchq */ ++ ++ struct sbuf so_rcv; /* Receive buffer */ ++ struct sbuf so_snd; /* Send buffer */ ++}; ++ ++ ++/* ++ * Socket state bits. (peer means the host on the Internet, ++ * local host means the host on the other end of the modem) ++ */ ++#define SS_NOFDREF 0x001 /* No fd reference */ ++ ++#define SS_ISFCONNECTING \ ++ 0x002 /* Socket is connecting to peer (non-blocking connect()'s) */ ++#define SS_ISFCONNECTED 0x004 /* Socket is connected to peer */ ++#define SS_FCANTRCVMORE \ ++ 0x008 /* Socket can't receive more from peer (for half-closes) */ ++#define SS_FCANTSENDMORE \ ++ 0x010 /* Socket can't send more to peer (for half-closes) */ ++#define SS_FWDRAIN \ ++ 0x040 /* We received a FIN, drain data and set SS_FCANTSENDMORE */ ++ ++#define SS_CTL 0x080 ++#define SS_FACCEPTCONN \ ++ 0x100 /* Socket is accepting connections from a host on the internet */ ++#define SS_FACCEPTONCE \ ++ 0x200 /* If set, the SS_FACCEPTCONN socket will die after one accept */ ++ ++#define SS_PERSISTENT_MASK 0xf000 /* Unremovable state bits */ ++#define SS_HOSTFWD 0x1000 /* Socket describes host->guest forwarding */ ++#define SS_INCOMING \ ++ 0x2000 /* Connection was initiated by a host on the internet */ ++#define SS_HOSTFWD_V6ONLY 0x4000 /* Only bind on v6 addresses */ ++ ++static inline int sockaddr_equal(const struct sockaddr_storage *a, ++ const struct sockaddr_storage *b) ++{ ++ if (a->ss_family != b->ss_family) { ++ return 0; ++ } ++ ++ switch (a->ss_family) { ++ case AF_INET: { ++ const struct sockaddr_in *a4 = (const struct sockaddr_in *)a; ++ const struct sockaddr_in *b4 = (const struct sockaddr_in *)b; ++ return a4->sin_addr.s_addr == b4->sin_addr.s_addr && ++ a4->sin_port == b4->sin_port; ++ } ++ case AF_INET6: { ++ const struct sockaddr_in6 *a6 = (const struct sockaddr_in6 *)a; ++ const struct sockaddr_in6 *b6 = (const struct sockaddr_in6 *)b; ++ return (in6_equal(&a6->sin6_addr, &b6->sin6_addr) && ++ a6->sin6_port == b6->sin6_port); ++ } ++ default: ++ g_assert_not_reached(); ++ } ++ ++ return 0; ++} ++ ++static inline socklen_t sockaddr_size(const struct sockaddr_storage *a) ++{ ++ switch (a->ss_family) { ++ case AF_INET: ++ return sizeof(struct sockaddr_in); ++ case AF_INET6: ++ return sizeof(struct sockaddr_in6); ++ default: ++ g_assert_not_reached(); ++ } ++} ++ ++static inline void sockaddr_copy(struct sockaddr *dst, socklen_t dstlen, const struct sockaddr *src, socklen_t srclen) ++{ ++ socklen_t len = sockaddr_size((const struct sockaddr_storage *) src); ++ g_assert(len <= srclen); ++ g_assert(len <= dstlen); ++ memcpy(dst, src, len); ++} ++ ++struct socket *solookup(struct socket **, struct socket *, ++ struct sockaddr_storage *, struct sockaddr_storage *); ++struct socket *socreate(Slirp *); ++void sofree(struct socket *); ++int soread(struct socket *); ++int sorecvoob(struct socket *); ++int sosendoob(struct socket *); ++int sowrite(struct socket *); ++void sorecvfrom(struct socket *); ++int sosendto(struct socket *, struct mbuf *); ++struct socket *tcp_listen(Slirp *, uint32_t, unsigned, uint32_t, unsigned, int); ++struct socket *tcpx_listen(Slirp *slirp, ++ const struct sockaddr *haddr, socklen_t haddrlen, ++ const struct sockaddr *laddr, socklen_t laddrlen, ++ int flags); ++void soisfconnecting(register struct socket *); ++void soisfconnected(register struct socket *); ++void sofwdrain(struct socket *); ++struct iovec; /* For win32 */ ++size_t sopreprbuf(struct socket *so, struct iovec *iov, int *np); ++int soreadbuf(struct socket *so, const char *buf, int size); ++ ++int sotranslate_out(struct socket *, struct sockaddr_storage *); ++void sotranslate_in(struct socket *, struct sockaddr_storage *); ++void sotranslate_accept(struct socket *); ++void sodrop(struct socket *, int num); ++int soassign_guest_addr_if_needed(struct socket *so); ++ ++#endif /* SLIRP_SOCKET_H */ +diff --git a/slirp/src/state.c b/slirp/src/state.c +new file mode 100644 +index 0000000000..22af77b256 +--- /dev/null ++++ b/slirp/src/state.c +@@ -0,0 +1,379 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * libslirp ++ * ++ * Copyright (c) 2004-2008 Fabrice Bellard ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "slirp.h" ++#include "vmstate.h" ++#include "stream.h" ++ ++static int slirp_tcp_post_load(void *opaque, int version) ++{ ++ tcp_template((struct tcpcb *)opaque); ++ ++ return 0; ++} ++ ++static const VMStateDescription vmstate_slirp_tcp = { ++ .name = "slirp-tcp", ++ .version_id = 0, ++ .post_load = slirp_tcp_post_load, ++ .fields = (VMStateField[]){ VMSTATE_INT16(t_state, struct tcpcb), ++ VMSTATE_INT16_ARRAY(t_timer, struct tcpcb, ++ TCPT_NTIMERS), ++ VMSTATE_INT16(t_rxtshift, struct tcpcb), ++ VMSTATE_INT16(t_rxtcur, struct tcpcb), ++ VMSTATE_INT16(t_dupacks, struct tcpcb), ++ VMSTATE_UINT16(t_maxseg, struct tcpcb), ++ VMSTATE_UINT8(t_force, struct tcpcb), ++ VMSTATE_UINT16(t_flags, struct tcpcb), ++ VMSTATE_UINT32(snd_una, struct tcpcb), ++ VMSTATE_UINT32(snd_nxt, struct tcpcb), ++ VMSTATE_UINT32(snd_up, struct tcpcb), ++ VMSTATE_UINT32(snd_wl1, struct tcpcb), ++ VMSTATE_UINT32(snd_wl2, struct tcpcb), ++ VMSTATE_UINT32(iss, struct tcpcb), ++ VMSTATE_UINT32(snd_wnd, struct tcpcb), ++ VMSTATE_UINT32(rcv_wnd, struct tcpcb), ++ VMSTATE_UINT32(rcv_nxt, struct tcpcb), ++ VMSTATE_UINT32(rcv_up, struct tcpcb), ++ VMSTATE_UINT32(irs, struct tcpcb), ++ VMSTATE_UINT32(rcv_adv, struct tcpcb), ++ VMSTATE_UINT32(snd_max, struct tcpcb), ++ VMSTATE_UINT32(snd_cwnd, struct tcpcb), ++ VMSTATE_UINT32(snd_ssthresh, struct tcpcb), ++ VMSTATE_INT16(t_idle, struct tcpcb), ++ VMSTATE_INT16(t_rtt, struct tcpcb), ++ VMSTATE_UINT32(t_rtseq, struct tcpcb), ++ VMSTATE_INT16(t_srtt, struct tcpcb), ++ VMSTATE_INT16(t_rttvar, struct tcpcb), ++ VMSTATE_UINT16(t_rttmin, struct tcpcb), ++ VMSTATE_UINT32(max_sndwnd, struct tcpcb), ++ VMSTATE_UINT8(t_oobflags, struct tcpcb), ++ VMSTATE_UINT8(t_iobc, struct tcpcb), ++ VMSTATE_INT16(t_softerror, struct tcpcb), ++ VMSTATE_UINT8(snd_scale, struct tcpcb), ++ VMSTATE_UINT8(rcv_scale, struct tcpcb), ++ VMSTATE_UINT8(request_r_scale, struct tcpcb), ++ VMSTATE_UINT8(requested_s_scale, struct tcpcb), ++ VMSTATE_UINT32(ts_recent, struct tcpcb), ++ VMSTATE_UINT32(ts_recent_age, struct tcpcb), ++ VMSTATE_UINT32(last_ack_sent, struct tcpcb), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++/* The sbuf has a pair of pointers that are migrated as offsets; ++ * we calculate the offsets and restore the pointers using ++ * pre_save/post_load on a tmp structure. ++ */ ++struct sbuf_tmp { ++ struct sbuf *parent; ++ uint32_t roff, woff; ++}; ++ ++static int sbuf_tmp_pre_save(void *opaque) ++{ ++ struct sbuf_tmp *tmp = opaque; ++ tmp->woff = tmp->parent->sb_wptr - tmp->parent->sb_data; ++ tmp->roff = tmp->parent->sb_rptr - tmp->parent->sb_data; ++ ++ return 0; ++} ++ ++static int sbuf_tmp_post_load(void *opaque, int version) ++{ ++ struct sbuf_tmp *tmp = opaque; ++ uint32_t requested_len = tmp->parent->sb_datalen; ++ ++ /* Allocate the buffer space used by the field after the tmp */ ++ sbreserve(tmp->parent, tmp->parent->sb_datalen); ++ ++ if (tmp->woff >= requested_len || tmp->roff >= requested_len) { ++ g_critical("invalid sbuf offsets r/w=%u/%u len=%u", tmp->roff, ++ tmp->woff, requested_len); ++ return -EINVAL; ++ } ++ ++ tmp->parent->sb_wptr = tmp->parent->sb_data + tmp->woff; ++ tmp->parent->sb_rptr = tmp->parent->sb_data + tmp->roff; ++ ++ return 0; ++} ++ ++ ++static const VMStateDescription vmstate_slirp_sbuf_tmp = { ++ .name = "slirp-sbuf-tmp", ++ .post_load = sbuf_tmp_post_load, ++ .pre_save = sbuf_tmp_pre_save, ++ .version_id = 0, ++ .fields = (VMStateField[]){ VMSTATE_UINT32(woff, struct sbuf_tmp), ++ VMSTATE_UINT32(roff, struct sbuf_tmp), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static const VMStateDescription vmstate_slirp_sbuf = { ++ .name = "slirp-sbuf", ++ .version_id = 0, ++ .fields = (VMStateField[]){ VMSTATE_UINT32(sb_cc, struct sbuf), ++ VMSTATE_UINT32(sb_datalen, struct sbuf), ++ VMSTATE_WITH_TMP(struct sbuf, struct sbuf_tmp, ++ vmstate_slirp_sbuf_tmp), ++ VMSTATE_VBUFFER_UINT32(sb_data, struct sbuf, 0, ++ NULL, sb_datalen), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static bool slirp_older_than_v4(void *opaque, int version_id) ++{ ++ return version_id < 4; ++} ++ ++static bool slirp_family_inet(void *opaque, int version_id) ++{ ++ union slirp_sockaddr *ssa = (union slirp_sockaddr *)opaque; ++ return ssa->ss.ss_family == AF_INET; ++} ++ ++static int slirp_socket_pre_load(void *opaque) ++{ ++ struct socket *so = opaque; ++ ++ tcp_attach(so); ++ /* Older versions don't load these fields */ ++ so->so_ffamily = AF_INET; ++ so->so_lfamily = AF_INET; ++ return 0; ++} ++ ++#ifndef _WIN32 ++#define VMSTATE_SIN4_ADDR(f, s, t) VMSTATE_UINT32_TEST(f, s, t) ++#else ++/* Win uses u_long rather than uint32_t - but it's still 32bits long */ ++#define VMSTATE_SIN4_ADDR(f, s, t) \ ++ VMSTATE_SINGLE_TEST(f, s, t, 0, slirp_vmstate_info_uint32, u_long) ++#endif ++ ++/* The OS provided ss_family field isn't that portable; it's size ++ * and type varies (16/8 bit, signed, unsigned) ++ * and the values it contains aren't fully portable. ++ */ ++typedef struct SS_FamilyTmpStruct { ++ union slirp_sockaddr *parent; ++ uint16_t portable_family; ++} SS_FamilyTmpStruct; ++ ++#define SS_FAMILY_MIG_IPV4 2 /* Linux, BSD, Win... */ ++#define SS_FAMILY_MIG_IPV6 10 /* Linux */ ++#define SS_FAMILY_MIG_OTHER 0xffff ++ ++static int ss_family_pre_save(void *opaque) ++{ ++ SS_FamilyTmpStruct *tss = opaque; ++ ++ tss->portable_family = SS_FAMILY_MIG_OTHER; ++ ++ if (tss->parent->ss.ss_family == AF_INET) { ++ tss->portable_family = SS_FAMILY_MIG_IPV4; ++ } else if (tss->parent->ss.ss_family == AF_INET6) { ++ tss->portable_family = SS_FAMILY_MIG_IPV6; ++ } ++ ++ return 0; ++} ++ ++static int ss_family_post_load(void *opaque, int version_id) ++{ ++ SS_FamilyTmpStruct *tss = opaque; ++ ++ switch (tss->portable_family) { ++ case SS_FAMILY_MIG_IPV4: ++ tss->parent->ss.ss_family = AF_INET; ++ break; ++ case SS_FAMILY_MIG_IPV6: ++ case 23: /* compatibility: AF_INET6 from mingw */ ++ case 28: /* compatibility: AF_INET6 from FreeBSD sys/socket.h */ ++ tss->parent->ss.ss_family = AF_INET6; ++ break; ++ default: ++ g_critical("invalid ss_family type %x", tss->portable_family); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static const VMStateDescription vmstate_slirp_ss_family = { ++ .name = "slirp-socket-addr/ss_family", ++ .pre_save = ss_family_pre_save, ++ .post_load = ss_family_post_load, ++ .fields = ++ (VMStateField[]){ VMSTATE_UINT16(portable_family, SS_FamilyTmpStruct), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static const VMStateDescription vmstate_slirp_socket_addr = { ++ .name = "slirp-socket-addr", ++ .version_id = 4, ++ .fields = ++ (VMStateField[]){ ++ VMSTATE_WITH_TMP(union slirp_sockaddr, SS_FamilyTmpStruct, ++ vmstate_slirp_ss_family), ++ VMSTATE_SIN4_ADDR(sin.sin_addr.s_addr, union slirp_sockaddr, ++ slirp_family_inet), ++ VMSTATE_UINT16_TEST(sin.sin_port, union slirp_sockaddr, ++ slirp_family_inet), ++ ++#if 0 ++ /* Untested: Needs checking by someone with IPv6 test */ ++ VMSTATE_BUFFER_TEST(sin6.sin6_addr, union slirp_sockaddr, ++ slirp_family_inet6), ++ VMSTATE_UINT16_TEST(sin6.sin6_port, union slirp_sockaddr, ++ slirp_family_inet6), ++ VMSTATE_UINT32_TEST(sin6.sin6_flowinfo, union slirp_sockaddr, ++ slirp_family_inet6), ++ VMSTATE_UINT32_TEST(sin6.sin6_scope_id, union slirp_sockaddr, ++ slirp_family_inet6), ++#endif ++ ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static const VMStateDescription vmstate_slirp_socket = { ++ .name = "slirp-socket", ++ .version_id = 4, ++ .pre_load = slirp_socket_pre_load, ++ .fields = ++ (VMStateField[]){ ++ VMSTATE_UINT32(so_urgc, struct socket), ++ /* Pre-v4 versions */ ++ VMSTATE_SIN4_ADDR(so_faddr.s_addr, struct socket, ++ slirp_older_than_v4), ++ VMSTATE_SIN4_ADDR(so_laddr.s_addr, struct socket, ++ slirp_older_than_v4), ++ VMSTATE_UINT16_TEST(so_fport, struct socket, slirp_older_than_v4), ++ VMSTATE_UINT16_TEST(so_lport, struct socket, slirp_older_than_v4), ++ /* v4 and newer */ ++ VMSTATE_STRUCT(fhost, struct socket, 4, vmstate_slirp_socket_addr, ++ union slirp_sockaddr), ++ VMSTATE_STRUCT(lhost, struct socket, 4, vmstate_slirp_socket_addr, ++ union slirp_sockaddr), ++ ++ VMSTATE_UINT8(so_iptos, struct socket), ++ VMSTATE_UINT8(so_emu, struct socket), ++ VMSTATE_UINT8(so_type, struct socket), ++ VMSTATE_INT32(so_state, struct socket), ++ VMSTATE_STRUCT(so_rcv, struct socket, 0, vmstate_slirp_sbuf, ++ struct sbuf), ++ VMSTATE_STRUCT(so_snd, struct socket, 0, vmstate_slirp_sbuf, ++ struct sbuf), ++ VMSTATE_STRUCT_POINTER(so_tcpcb, struct socket, vmstate_slirp_tcp, ++ struct tcpcb), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static const VMStateDescription vmstate_slirp_bootp_client = { ++ .name = "slirp_bootpclient", ++ .fields = (VMStateField[]){ VMSTATE_UINT16(allocated, BOOTPClient), ++ VMSTATE_BUFFER(macaddr, BOOTPClient), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static const VMStateDescription vmstate_slirp = { ++ .name = "slirp", ++ .version_id = 4, ++ .fields = (VMStateField[]){ VMSTATE_UINT16_V(ip_id, Slirp, 2), ++ VMSTATE_STRUCT_ARRAY( ++ bootp_clients, Slirp, NB_BOOTP_CLIENTS, 3, ++ vmstate_slirp_bootp_client, BOOTPClient), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++void slirp_state_save(Slirp *slirp, SlirpWriteCb write_cb, void *opaque) ++{ ++ struct gfwd_list *ex_ptr; ++ SlirpOStream f = { ++ .write_cb = write_cb, ++ .opaque = opaque, ++ }; ++ ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ex_ptr = ex_ptr->ex_next) ++ if (ex_ptr->write_cb) { ++ struct socket *so; ++ so = slirp_find_ctl_socket(slirp, ex_ptr->ex_addr, ++ ntohs(ex_ptr->ex_fport)); ++ if (!so) { ++ continue; ++ } ++ ++ slirp_ostream_write_u8(&f, 42); ++ slirp_vmstate_save_state(&f, &vmstate_slirp_socket, so); ++ } ++ slirp_ostream_write_u8(&f, 0); ++ ++ slirp_vmstate_save_state(&f, &vmstate_slirp, slirp); ++} ++ ++ ++int slirp_state_load(Slirp *slirp, int version_id, SlirpReadCb read_cb, ++ void *opaque) ++{ ++ struct gfwd_list *ex_ptr; ++ SlirpIStream f = { ++ .read_cb = read_cb, ++ .opaque = opaque, ++ }; ++ ++ while (slirp_istream_read_u8(&f)) { ++ int ret; ++ struct socket *so = socreate(slirp); ++ ++ ret = ++ slirp_vmstate_load_state(&f, &vmstate_slirp_socket, so, version_id); ++ if (ret < 0) { ++ return ret; ++ } ++ ++ if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) != ++ slirp->vnetwork_addr.s_addr) { ++ return -EINVAL; ++ } ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ex_ptr = ex_ptr->ex_next) { ++ if (ex_ptr->write_cb && ++ so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr && ++ so->so_fport == ex_ptr->ex_fport) { ++ break; ++ } ++ } ++ if (!ex_ptr) { ++ return -EINVAL; ++ } ++ ++ so->guestfwd = ex_ptr; ++ } ++ ++ return slirp_vmstate_load_state(&f, &vmstate_slirp, slirp, version_id); ++} ++ ++int slirp_state_version(void) ++{ ++ return 4; ++} +diff --git a/slirp/src/stream.c b/slirp/src/stream.c +new file mode 100644 +index 0000000000..6cf326f669 +--- /dev/null ++++ b/slirp/src/stream.c +@@ -0,0 +1,120 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * libslirp io streams ++ * ++ * Copyright (c) 2018 Red Hat, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "stream.h" ++#include ++ ++bool slirp_istream_read(SlirpIStream *f, void *buf, size_t size) ++{ ++ return f->read_cb(buf, size, f->opaque) == size; ++} ++ ++bool slirp_ostream_write(SlirpOStream *f, const void *buf, size_t size) ++{ ++ return f->write_cb(buf, size, f->opaque) == size; ++} ++ ++uint8_t slirp_istream_read_u8(SlirpIStream *f) ++{ ++ uint8_t b; ++ ++ if (slirp_istream_read(f, &b, sizeof(b))) { ++ return b; ++ } ++ ++ return 0; ++} ++ ++bool slirp_ostream_write_u8(SlirpOStream *f, uint8_t b) ++{ ++ return slirp_ostream_write(f, &b, sizeof(b)); ++} ++ ++uint16_t slirp_istream_read_u16(SlirpIStream *f) ++{ ++ uint16_t b; ++ ++ if (slirp_istream_read(f, &b, sizeof(b))) { ++ return GUINT16_FROM_BE(b); ++ } ++ ++ return 0; ++} ++ ++bool slirp_ostream_write_u16(SlirpOStream *f, uint16_t b) ++{ ++ b = GUINT16_TO_BE(b); ++ return slirp_ostream_write(f, &b, sizeof(b)); ++} ++ ++uint32_t slirp_istream_read_u32(SlirpIStream *f) ++{ ++ uint32_t b; ++ ++ if (slirp_istream_read(f, &b, sizeof(b))) { ++ return GUINT32_FROM_BE(b); ++ } ++ ++ return 0; ++} ++ ++bool slirp_ostream_write_u32(SlirpOStream *f, uint32_t b) ++{ ++ b = GUINT32_TO_BE(b); ++ return slirp_ostream_write(f, &b, sizeof(b)); ++} ++ ++int16_t slirp_istream_read_i16(SlirpIStream *f) ++{ ++ int16_t b; ++ ++ if (slirp_istream_read(f, &b, sizeof(b))) { ++ return GINT16_FROM_BE(b); ++ } ++ ++ return 0; ++} ++ ++bool slirp_ostream_write_i16(SlirpOStream *f, int16_t b) ++{ ++ b = GINT16_TO_BE(b); ++ return slirp_ostream_write(f, &b, sizeof(b)); ++} ++ ++int32_t slirp_istream_read_i32(SlirpIStream *f) ++{ ++ int32_t b; ++ ++ if (slirp_istream_read(f, &b, sizeof(b))) { ++ return GINT32_FROM_BE(b); ++ } ++ ++ return 0; ++} ++ ++bool slirp_ostream_write_i32(SlirpOStream *f, int32_t b) ++{ ++ b = GINT32_TO_BE(b); ++ return slirp_ostream_write(f, &b, sizeof(b)); ++} +diff --git a/slirp/src/stream.h b/slirp/src/stream.h +new file mode 100644 +index 0000000000..08bb5b6610 +--- /dev/null ++++ b/slirp/src/stream.h +@@ -0,0 +1,35 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++#ifndef STREAM_H_ ++#define STREAM_H_ ++ ++#include "libslirp.h" ++ ++typedef struct SlirpIStream { ++ SlirpReadCb read_cb; ++ void *opaque; ++} SlirpIStream; ++ ++typedef struct SlirpOStream { ++ SlirpWriteCb write_cb; ++ void *opaque; ++} SlirpOStream; ++ ++bool slirp_istream_read(SlirpIStream *f, void *buf, size_t size); ++bool slirp_ostream_write(SlirpOStream *f, const void *buf, size_t size); ++ ++uint8_t slirp_istream_read_u8(SlirpIStream *f); ++bool slirp_ostream_write_u8(SlirpOStream *f, uint8_t b); ++ ++uint16_t slirp_istream_read_u16(SlirpIStream *f); ++bool slirp_ostream_write_u16(SlirpOStream *f, uint16_t b); ++ ++uint32_t slirp_istream_read_u32(SlirpIStream *f); ++bool slirp_ostream_write_u32(SlirpOStream *f, uint32_t b); ++ ++int16_t slirp_istream_read_i16(SlirpIStream *f); ++bool slirp_ostream_write_i16(SlirpOStream *f, int16_t b); ++ ++int32_t slirp_istream_read_i32(SlirpIStream *f); ++bool slirp_ostream_write_i32(SlirpOStream *f, int32_t b); ++ ++#endif /* STREAM_H_ */ +diff --git a/slirp/src/tcp.h b/slirp/src/tcp.h +new file mode 100644 +index 0000000000..70a9760664 +--- /dev/null ++++ b/slirp/src/tcp.h +@@ -0,0 +1,169 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp.h 8.1 (Berkeley) 6/10/93 ++ * tcp.h,v 1.3 1994/08/21 05:27:34 paul Exp ++ */ ++ ++#ifndef TCP_H ++#define TCP_H ++ ++#include ++ ++typedef uint32_t tcp_seq; ++ ++#define PR_SLOWHZ 2 /* 2 slow timeouts per second (approx) */ ++#define PR_FASTHZ 5 /* 5 fast timeouts per second (not important) */ ++ ++#define TCP_SNDSPACE 1024 * 128 ++#define TCP_RCVSPACE 1024 * 128 ++#define TCP_MAXSEG_MAX 32768 ++ ++/* ++ * TCP header. ++ * Per RFC 793, September, 1981. ++ */ ++#define tcphdr slirp_tcphdr ++struct tcphdr { ++ uint16_t th_sport; /* source port */ ++ uint16_t th_dport; /* destination port */ ++ tcp_seq th_seq; /* sequence number */ ++ tcp_seq th_ack; /* acknowledgement number */ ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint8_t th_off : 4, /* data offset */ ++ th_x2 : 4; /* (unused) */ ++#else ++ uint8_t th_x2 : 4, /* (unused) */ ++ th_off : 4; /* data offset */ ++#endif ++ uint8_t th_flags; ++ uint16_t th_win; /* window */ ++ uint16_t th_sum; /* checksum */ ++ uint16_t th_urp; /* urgent pointer */ ++}; ++ ++#include "tcp_var.h" ++ ++#ifndef TH_FIN ++#define TH_FIN 0x01 ++#define TH_SYN 0x02 ++#define TH_RST 0x04 ++#define TH_PUSH 0x08 ++#define TH_ACK 0x10 ++#define TH_URG 0x20 ++#endif ++ ++#ifndef TCPOPT_EOL ++#define TCPOPT_EOL 0 ++#define TCPOPT_NOP 1 ++#define TCPOPT_MAXSEG 2 ++#define TCPOPT_WINDOW 3 ++#define TCPOPT_SACK_PERMITTED 4 /* Experimental */ ++#define TCPOPT_SACK 5 /* Experimental */ ++#define TCPOPT_TIMESTAMP 8 ++ ++#define TCPOPT_TSTAMP_HDR \ ++ (TCPOPT_NOP << 24 | TCPOPT_NOP << 16 | TCPOPT_TIMESTAMP << 8 | \ ++ TCPOLEN_TIMESTAMP) ++#endif ++ ++#ifndef TCPOLEN_MAXSEG ++#define TCPOLEN_MAXSEG 4 ++#define TCPOLEN_WINDOW 3 ++#define TCPOLEN_SACK_PERMITTED 2 ++#define TCPOLEN_TIMESTAMP 10 ++#define TCPOLEN_TSTAMP_APPA (TCPOLEN_TIMESTAMP + 2) /* appendix A */ ++#endif ++ ++#undef TCP_MAXWIN ++#define TCP_MAXWIN 65535 /* largest value for (unscaled) window */ ++ ++#undef TCP_MAX_WINSHIFT ++#define TCP_MAX_WINSHIFT 14 /* maximum window shift */ ++ ++/* ++ * User-settable options (used with setsockopt). ++ * ++ * We don't use the system headers on unix because we have conflicting ++ * local structures. We can't avoid the system definitions on Windows, ++ * so we undefine them. ++ */ ++#undef TCP_NODELAY ++#define TCP_NODELAY 0x01 /* don't delay send to coalesce packets */ ++#undef TCP_MAXSEG ++ ++/* ++ * TCP FSM state definitions. ++ * Per RFC793, September, 1981. ++ */ ++ ++#define TCP_NSTATES 11 ++ ++#define TCPS_CLOSED 0 /* closed */ ++#define TCPS_LISTEN 1 /* listening for connection */ ++#define TCPS_SYN_SENT 2 /* active, have sent syn */ ++#define TCPS_SYN_RECEIVED 3 /* have send and received syn */ ++/* states < TCPS_ESTABLISHED are those where connections not established */ ++#define TCPS_ESTABLISHED 4 /* established */ ++#define TCPS_CLOSE_WAIT 5 /* rcvd fin, waiting for close */ ++/* states > TCPS_CLOSE_WAIT are those where user has closed */ ++#define TCPS_FIN_WAIT_1 6 /* have closed, sent fin */ ++#define TCPS_CLOSING 7 /* closed xchd FIN; await FIN ACK */ ++#define TCPS_LAST_ACK 8 /* had fin and close; await FIN ACK */ ++/* states > TCPS_CLOSE_WAIT && < TCPS_FIN_WAIT_2 await ACK of FIN */ ++#define TCPS_FIN_WAIT_2 9 /* have closed, fin is acked */ ++#define TCPS_TIME_WAIT 10 /* in 2*msl quiet wait after close */ ++ ++#define TCPS_HAVERCVDSYN(s) ((s) >= TCPS_SYN_RECEIVED) ++#define TCPS_HAVEESTABLISHED(s) ((s) >= TCPS_ESTABLISHED) ++#define TCPS_HAVERCVDFIN(s) ((s) >= TCPS_TIME_WAIT) ++ ++/* ++ * TCP sequence numbers are 32 bit integers operated ++ * on with modular arithmetic. These macros can be ++ * used to compare such integers. ++ */ ++#define SEQ_LT(a, b) ((int)((a) - (b)) < 0) ++#define SEQ_LEQ(a, b) ((int)((a) - (b)) <= 0) ++#define SEQ_GT(a, b) ((int)((a) - (b)) > 0) ++#define SEQ_GEQ(a, b) ((int)((a) - (b)) >= 0) ++ ++/* ++ * Macros to initialize tcp sequence numbers for ++ * send and receive from initial send and receive ++ * sequence numbers. ++ */ ++#define tcp_rcvseqinit(tp) (tp)->rcv_adv = (tp)->rcv_nxt = (tp)->irs + 1 ++ ++#define tcp_sendseqinit(tp) \ ++ (tp)->snd_una = (tp)->snd_nxt = (tp)->snd_max = (tp)->snd_up = (tp)->iss ++ ++#define TCP_ISSINCR (125 * 1024) /* increment for tcp_iss each second */ ++ ++#endif +diff --git a/slirp/src/tcp_input.c b/slirp/src/tcp_input.c +new file mode 100644 +index 0000000000..36a4844a7d +--- /dev/null ++++ b/slirp/src/tcp_input.c +@@ -0,0 +1,1552 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_input.c 8.5 (Berkeley) 4/10/94 ++ * tcp_input.c,v 1.10 1994/10/13 18:36:32 wollman Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++#include "ip_icmp.h" ++ ++#define TCPREXMTTHRESH 3 ++ ++#define TCP_PAWS_IDLE (24 * 24 * 60 * 60 * PR_SLOWHZ) ++ ++/* for modulo comparisons of timestamps */ ++#define TSTMP_LT(a, b) ((int)((a) - (b)) < 0) ++#define TSTMP_GEQ(a, b) ((int)((a) - (b)) >= 0) ++ ++/* ++ * Insert segment ti into reassembly queue of tcp with ++ * control block tp. Return TH_FIN if reassembly now includes ++ * a segment with FIN. The macro form does the common case inline ++ * (segment is the next to be received on an established connection, ++ * and the queue is empty), avoiding linkage into and removal ++ * from the queue and repetition of various conversions. ++ * Set DELACK for segments received in order, but ack immediately ++ * when segments are out of order (so fast retransmit can work). ++ */ ++#define TCP_REASS(tp, ti, m, so, flags) \ ++ { \ ++ if ((ti)->ti_seq == (tp)->rcv_nxt && tcpfrag_list_empty(tp) && \ ++ (tp)->t_state == TCPS_ESTABLISHED) { \ ++ tp->t_flags |= TF_DELACK; \ ++ (tp)->rcv_nxt += (ti)->ti_len; \ ++ flags = (ti)->ti_flags & TH_FIN; \ ++ if (so->so_emu) { \ ++ if (tcp_emu((so), (m))) \ ++ sbappend(so, (m)); \ ++ } else \ ++ sbappend((so), (m)); \ ++ } else { \ ++ (flags) = tcp_reass((tp), (ti), (m)); \ ++ tp->t_flags |= TF_ACKNOW; \ ++ } \ ++ } ++ ++static void tcp_dooptions(struct tcpcb *tp, uint8_t *cp, int cnt, ++ struct tcpiphdr *ti); ++static void tcp_xmit_timer(register struct tcpcb *tp, int rtt); ++ ++static int tcp_reass(register struct tcpcb *tp, register struct tcpiphdr *ti, ++ struct mbuf *m) ++{ ++ if (m) ++ M_DUP_DEBUG(m->slirp, m, 0, 0); ++ ++ register struct tcpiphdr *q; ++ struct socket *so = tp->t_socket; ++ int flags; ++ ++ /* ++ * Call with ti==NULL after become established to ++ * force pre-ESTABLISHED data up to user socket. ++ */ ++ if (ti == NULL) ++ goto present; ++ ++ /* ++ * Find a segment which begins after this one does. ++ */ ++ for (q = tcpfrag_list_first(tp); !tcpfrag_list_end(q, tp); ++ q = tcpiphdr_next(q)) ++ if (SEQ_GT(q->ti_seq, ti->ti_seq)) ++ break; ++ ++ /* ++ * If there is a preceding segment, it may provide some of ++ * our data already. If so, drop the data from the incoming ++ * segment. If it provides all of our data, drop us. ++ */ ++ if (!tcpfrag_list_end(tcpiphdr_prev(q), tp)) { ++ register int i; ++ q = tcpiphdr_prev(q); ++ /* conversion to int (in i) handles seq wraparound */ ++ i = q->ti_seq + q->ti_len - ti->ti_seq; ++ if (i > 0) { ++ if (i >= ti->ti_len) { ++ m_free(m); ++ /* ++ * Try to present any queued data ++ * at the left window edge to the user. ++ * This is needed after the 3-WHS ++ * completes. ++ */ ++ goto present; /* ??? */ ++ } ++ m_adj(m, i); ++ ti->ti_len -= i; ++ ti->ti_seq += i; ++ } ++ q = tcpiphdr_next(q); ++ } ++ ti->ti_mbuf = m; ++ ++ /* ++ * While we overlap succeeding segments trim them or, ++ * if they are completely covered, dequeue them. ++ */ ++ while (!tcpfrag_list_end(q, tp)) { ++ register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq; ++ if (i <= 0) ++ break; ++ if (i < q->ti_len) { ++ q->ti_seq += i; ++ q->ti_len -= i; ++ m_adj(q->ti_mbuf, i); ++ break; ++ } ++ q = tcpiphdr_next(q); ++ m = tcpiphdr_prev(q)->ti_mbuf; ++ remque(tcpiphdr2qlink(tcpiphdr_prev(q))); ++ m_free(m); ++ } ++ ++ /* ++ * Stick new segment in its place. ++ */ ++ insque(tcpiphdr2qlink(ti), tcpiphdr2qlink(tcpiphdr_prev(q))); ++ ++present: ++ /* ++ * Present data to user, advancing rcv_nxt through ++ * completed sequence space. ++ */ ++ if (!TCPS_HAVEESTABLISHED(tp->t_state)) ++ return (0); ++ ti = tcpfrag_list_first(tp); ++ if (tcpfrag_list_end(ti, tp) || ti->ti_seq != tp->rcv_nxt) ++ return (0); ++ if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len) ++ return (0); ++ do { ++ tp->rcv_nxt += ti->ti_len; ++ flags = ti->ti_flags & TH_FIN; ++ remque(tcpiphdr2qlink(ti)); ++ m = ti->ti_mbuf; ++ ti = tcpiphdr_next(ti); ++ if (so->so_state & SS_FCANTSENDMORE) ++ m_free(m); ++ else { ++ if (so->so_emu) { ++ if (tcp_emu(so, m)) ++ sbappend(so, m); ++ } else ++ sbappend(so, m); ++ } ++ } while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt); ++ return (flags); ++} ++ ++/* ++ * TCP input routine, follows pages 65-76 of the ++ * protocol specification dated September, 1981 very closely. ++ */ ++void tcp_input(struct mbuf *m, int iphlen, struct socket *inso, ++ unsigned short af) ++{ ++ struct ip save_ip, *ip; ++ struct ip6 save_ip6, *ip6; ++ register struct tcpiphdr *ti; ++ char *optp = NULL; ++ int optlen = 0; ++ int len, tlen, off; ++ register struct tcpcb *tp = NULL; ++ register int tiflags; ++ struct socket *so = NULL; ++ int todrop, acked, ourfinisacked, needoutput = 0; ++ int iss = 0; ++ uint32_t tiwin; ++ int ret; ++ struct sockaddr_storage lhost, fhost; ++ struct sockaddr_in *lhost4, *fhost4; ++ struct sockaddr_in6 *lhost6, *fhost6; ++ struct gfwd_list *ex_ptr; ++ Slirp *slirp; ++ ++ DEBUG_CALL("tcp_input"); ++ DEBUG_ARG("m = %p iphlen = %2d inso = %p", m, iphlen, inso); ++ ++ /* ++ * If called with m == 0, then we're continuing the connect ++ */ ++ if (m == NULL) { ++ so = inso; ++ slirp = so->slirp; ++ ++ /* Re-set a few variables */ ++ tp = sototcpcb(so); ++ m = so->so_m; ++ so->so_m = NULL; ++ ti = so->so_ti; ++ tiwin = ti->ti_win; ++ tiflags = ti->ti_flags; ++ ++ goto cont_conn; ++ } ++ slirp = m->slirp; ++ switch (af) { ++ case AF_INET: ++ M_DUP_DEBUG(slirp, m, 0, ++ sizeof(struct tcpiphdr) - sizeof(struct ip) - sizeof(struct tcphdr)); ++ break; ++ case AF_INET6: ++ M_DUP_DEBUG(slirp, m, 0, ++ sizeof(struct tcpiphdr) - sizeof(struct ip6) - sizeof(struct tcphdr)); ++ break; ++ } ++ ++ ip = mtod(m, struct ip *); ++ ip6 = mtod(m, struct ip6 *); ++ ++ switch (af) { ++ case AF_INET: ++ if (iphlen > sizeof(struct ip)) { ++ ip_stripoptions(m, (struct mbuf *)0); ++ iphlen = sizeof(struct ip); ++ } ++ /* XXX Check if too short */ ++ ++ ++ /* ++ * Save a copy of the IP header in case we want restore it ++ * for sending an ICMP error message in response. ++ */ ++ save_ip = *ip; ++ save_ip.ip_len += iphlen; ++ ++ /* ++ * Get IP and TCP header together in first mbuf. ++ * Note: IP leaves IP header in first mbuf. ++ */ ++ m->m_data -= ++ sizeof(struct tcpiphdr) - sizeof(struct ip) - sizeof(struct tcphdr); ++ m->m_len += ++ sizeof(struct tcpiphdr) - sizeof(struct ip) - sizeof(struct tcphdr); ++ ti = mtod(m, struct tcpiphdr *); ++ ++ /* ++ * Checksum extended TCP header and data. ++ */ ++ tlen = ip->ip_len; ++ tcpiphdr2qlink(ti)->next = tcpiphdr2qlink(ti)->prev = NULL; ++ memset(&ti->ih_mbuf, 0, sizeof(struct mbuf_ptr)); ++ memset(&ti->ti, 0, sizeof(ti->ti)); ++ ti->ti_x0 = 0; ++ ti->ti_src = save_ip.ip_src; ++ ti->ti_dst = save_ip.ip_dst; ++ ti->ti_pr = save_ip.ip_p; ++ ti->ti_len = htons((uint16_t)tlen); ++ break; ++ ++ case AF_INET6: ++ /* ++ * Save a copy of the IP header in case we want restore it ++ * for sending an ICMP error message in response. ++ */ ++ save_ip6 = *ip6; ++ /* ++ * Get IP and TCP header together in first mbuf. ++ * Note: IP leaves IP header in first mbuf. ++ */ ++ m->m_data -= sizeof(struct tcpiphdr) - ++ (sizeof(struct ip6) + sizeof(struct tcphdr)); ++ m->m_len += sizeof(struct tcpiphdr) - ++ (sizeof(struct ip6) + sizeof(struct tcphdr)); ++ ti = mtod(m, struct tcpiphdr *); ++ ++ tlen = ip6->ip_pl; ++ tcpiphdr2qlink(ti)->next = tcpiphdr2qlink(ti)->prev = NULL; ++ memset(&ti->ih_mbuf, 0, sizeof(struct mbuf_ptr)); ++ memset(&ti->ti, 0, sizeof(ti->ti)); ++ ti->ti_x0 = 0; ++ ti->ti_src6 = save_ip6.ip_src; ++ ti->ti_dst6 = save_ip6.ip_dst; ++ ti->ti_nh6 = save_ip6.ip_nh; ++ ti->ti_len = htons((uint16_t)tlen); ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ } ++ ++ len = ((sizeof(struct tcpiphdr) - sizeof(struct tcphdr)) + tlen); ++ if (cksum(m, len)) { ++ goto drop; ++ } ++ ++ /* ++ * Check that TCP offset makes sense, ++ * pull out TCP options and adjust length. XXX ++ */ ++ off = ti->ti_off << 2; ++ if (off < sizeof(struct tcphdr) || off > tlen) { ++ goto drop; ++ } ++ tlen -= off; ++ ti->ti_len = tlen; ++ if (off > sizeof(struct tcphdr)) { ++ optlen = off - sizeof(struct tcphdr); ++ optp = mtod(m, char *) + sizeof(struct tcpiphdr); ++ } ++ tiflags = ti->ti_flags; ++ ++ /* ++ * Convert TCP protocol specific fields to host format. ++ */ ++ NTOHL(ti->ti_seq); ++ NTOHL(ti->ti_ack); ++ NTOHS(ti->ti_win); ++ NTOHS(ti->ti_urp); ++ ++ /* ++ * Drop TCP, IP headers and TCP options. ++ */ ++ m->m_data += sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr); ++ m->m_len -= sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr); ++ ++ /* ++ * Locate pcb for segment. ++ */ ++findso: ++ lhost.ss_family = af; ++ fhost.ss_family = af; ++ switch (af) { ++ case AF_INET: ++ lhost4 = (struct sockaddr_in *)&lhost; ++ lhost4->sin_addr = ti->ti_src; ++ lhost4->sin_port = ti->ti_sport; ++ fhost4 = (struct sockaddr_in *)&fhost; ++ fhost4->sin_addr = ti->ti_dst; ++ fhost4->sin_port = ti->ti_dport; ++ break; ++ case AF_INET6: ++ lhost6 = (struct sockaddr_in6 *)&lhost; ++ lhost6->sin6_addr = ti->ti_src6; ++ lhost6->sin6_port = ti->ti_sport; ++ fhost6 = (struct sockaddr_in6 *)&fhost; ++ fhost6->sin6_addr = ti->ti_dst6; ++ fhost6->sin6_port = ti->ti_dport; ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ ++ so = solookup(&slirp->tcp_last_so, &slirp->tcb, &lhost, &fhost); ++ ++ /* ++ * If the state is CLOSED (i.e., TCB does not exist) then ++ * all data in the incoming segment is discarded. ++ * If the TCB exists but is in CLOSED state, it is embryonic, ++ * but should either do a listen or a connect soon. ++ * ++ * state == CLOSED means we've done socreate() but haven't ++ * attached it to a protocol yet... ++ * ++ * XXX If a TCB does not exist, and the TH_SYN flag is ++ * the only flag set, then create a session, mark it ++ * as if it was LISTENING, and continue... ++ */ ++ if (so == NULL) { ++ /* TODO: IPv6 */ ++ if (slirp->restricted) { ++ /* Any hostfwds will have an existing socket, so we only get here ++ * for non-hostfwd connections. These should be dropped, unless it ++ * happens to be a guestfwd. ++ */ ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ++ ex_ptr = ex_ptr->ex_next) { ++ if (ex_ptr->ex_fport == ti->ti_dport && ++ ti->ti_dst.s_addr == ex_ptr->ex_addr.s_addr) { ++ break; ++ } ++ } ++ if (!ex_ptr) { ++ goto dropwithreset; ++ } ++ } ++ ++ if ((tiflags & (TH_SYN | TH_FIN | TH_RST | TH_URG | TH_ACK)) != TH_SYN) ++ goto dropwithreset; ++ ++ so = socreate(slirp); ++ tcp_attach(so); ++ ++ sbreserve(&so->so_snd, TCP_SNDSPACE); ++ sbreserve(&so->so_rcv, TCP_RCVSPACE); ++ ++ so->lhost.ss = lhost; ++ so->fhost.ss = fhost; ++ ++ so->so_iptos = tcp_tos(so); ++ if (so->so_iptos == 0) { ++ switch (af) { ++ case AF_INET: ++ so->so_iptos = ((struct ip *)ti)->ip_tos; ++ break; ++ case AF_INET6: ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ } ++ ++ tp = sototcpcb(so); ++ tp->t_state = TCPS_LISTEN; ++ } ++ ++ /* ++ * If this is a still-connecting socket, this probably ++ * a retransmit of the SYN. Whether it's a retransmit SYN ++ * or something else, we nuke it. ++ */ ++ if (so->so_state & SS_ISFCONNECTING) ++ goto drop; ++ ++ tp = sototcpcb(so); ++ ++ /* XXX Should never fail */ ++ if (tp == NULL) ++ goto dropwithreset; ++ if (tp->t_state == TCPS_CLOSED) ++ goto drop; ++ ++ tiwin = ti->ti_win; ++ ++ /* ++ * Segment received on connection. ++ * Reset idle time and keep-alive timer. ++ */ ++ tp->t_idle = 0; ++ if (slirp_do_keepalive) ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEPINTVL; ++ else ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_IDLE; ++ ++ /* ++ * Process options if not in LISTEN state, ++ * else do it below (after getting remote address). ++ */ ++ if (optp && tp->t_state != TCPS_LISTEN) ++ tcp_dooptions(tp, (uint8_t *)optp, optlen, ti); ++ ++ /* ++ * Header prediction: check for the two common cases ++ * of a uni-directional data xfer. If the packet has ++ * no control flags, is in-sequence, the window didn't ++ * change and we're not retransmitting, it's a ++ * candidate. If the length is zero and the ack moved ++ * forward, we're the sender side of the xfer. Just ++ * free the data acked & wake any higher level process ++ * that was blocked waiting for space. If the length ++ * is non-zero and the ack didn't move, we're the ++ * receiver side. If we're getting packets in-order ++ * (the reassembly queue is empty), add the data to ++ * the socket buffer and note that we need a delayed ack. ++ * ++ * XXX Some of these tests are not needed ++ * eg: the tiwin == tp->snd_wnd prevents many more ++ * predictions.. with no *real* advantage.. ++ */ ++ if (tp->t_state == TCPS_ESTABLISHED && ++ (tiflags & (TH_SYN | TH_FIN | TH_RST | TH_URG | TH_ACK)) == TH_ACK && ++ ti->ti_seq == tp->rcv_nxt && tiwin && tiwin == tp->snd_wnd && ++ tp->snd_nxt == tp->snd_max) { ++ if (ti->ti_len == 0) { ++ if (SEQ_GT(ti->ti_ack, tp->snd_una) && ++ SEQ_LEQ(ti->ti_ack, tp->snd_max) && ++ tp->snd_cwnd >= tp->snd_wnd) { ++ /* ++ * this is a pure ack for outstanding data. ++ */ ++ if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) ++ tcp_xmit_timer(tp, tp->t_rtt); ++ acked = ti->ti_ack - tp->snd_una; ++ sodrop(so, acked); ++ tp->snd_una = ti->ti_ack; ++ m_free(m); ++ ++ /* ++ * If all outstanding data are acked, stop ++ * retransmit timer, otherwise restart timer ++ * using current (possibly backed-off) value. ++ * If process is waiting for space, ++ * wakeup/selwakeup/signal. If data ++ * are ready to send, let tcp_output ++ * decide between more output or persist. ++ */ ++ if (tp->snd_una == tp->snd_max) ++ tp->t_timer[TCPT_REXMT] = 0; ++ else if (tp->t_timer[TCPT_PERSIST] == 0) ++ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; ++ ++ /* ++ * This is called because sowwakeup might have ++ * put data into so_snd. Since we don't so sowwakeup, ++ * we don't need this.. XXX??? ++ */ ++ if (so->so_snd.sb_cc) ++ tcp_output(tp); ++ ++ return; ++ } ++ } else if (ti->ti_ack == tp->snd_una && tcpfrag_list_empty(tp) && ++ ti->ti_len <= sbspace(&so->so_rcv)) { ++ /* ++ * this is a pure, in-sequence data packet ++ * with nothing on the reassembly queue and ++ * we have enough buffer space to take it. ++ */ ++ tp->rcv_nxt += ti->ti_len; ++ /* ++ * Add data to socket buffer. ++ */ ++ if (so->so_emu) { ++ if (tcp_emu(so, m)) ++ sbappend(so, m); ++ } else ++ sbappend(so, m); ++ ++ /* ++ * If this is a short packet, then ACK now - with Nagel ++ * congestion avoidance sender won't send more until ++ * he gets an ACK. ++ * ++ * It is better to not delay acks at all to maximize ++ * TCP throughput. See RFC 2581. ++ */ ++ tp->t_flags |= TF_ACKNOW; ++ tcp_output(tp); ++ return; ++ } ++ } /* header prediction */ ++ /* ++ * Calculate amount of space in receive window, ++ * and then do TCP input processing. ++ * Receive window is amount of space in rcv queue, ++ * but not less than advertised window. ++ */ ++ { ++ int win; ++ win = sbspace(&so->so_rcv); ++ if (win < 0) ++ win = 0; ++ tp->rcv_wnd = MAX(win, (int)(tp->rcv_adv - tp->rcv_nxt)); ++ } ++ ++ switch (tp->t_state) { ++ /* ++ * If the state is LISTEN then ignore segment if it contains an RST. ++ * If the segment contains an ACK then it is bad and send a RST. ++ * If it does not contain a SYN then it is not interesting; drop it. ++ * Don't bother responding if the destination was a broadcast. ++ * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial ++ * tp->iss, and send a segment: ++ * ++ * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss. ++ * Fill in remote peer address fields if not previously specified. ++ * Enter SYN_RECEIVED state, and process any other fields of this ++ * segment in this state. ++ */ ++ case TCPS_LISTEN: { ++ if (tiflags & TH_RST) ++ goto drop; ++ if (tiflags & TH_ACK) ++ goto dropwithreset; ++ if ((tiflags & TH_SYN) == 0) ++ goto drop; ++ ++ /* ++ * This has way too many gotos... ++ * But a bit of spaghetti code never hurt anybody :) ++ */ ++ ++ /* ++ * If this is destined for the control address, then flag to ++ * tcp_ctl once connected, otherwise connect ++ */ ++ /* TODO: IPv6 */ ++ if (af == AF_INET && ++ (so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) == ++ slirp->vnetwork_addr.s_addr) { ++ if (so->so_faddr.s_addr != slirp->vhost_addr.s_addr && ++ so->so_faddr.s_addr != slirp->vnameserver_addr.s_addr) { ++ /* May be an add exec */ ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ++ ex_ptr = ex_ptr->ex_next) { ++ if (ex_ptr->ex_fport == so->so_fport && ++ so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr) { ++ so->so_state |= SS_CTL; ++ break; ++ } ++ } ++ if (so->so_state & SS_CTL) { ++ goto cont_input; ++ } ++ } ++ /* CTL_ALIAS: Do nothing, tcp_fconnect will be called on it */ ++ } ++ ++ if (so->so_emu & EMU_NOCONNECT) { ++ so->so_emu &= ~EMU_NOCONNECT; ++ goto cont_input; ++ } ++ ++ if ((tcp_fconnect(so, so->so_ffamily) == -1) && (errno != EAGAIN) && ++ (errno != EINPROGRESS) && (errno != EWOULDBLOCK)) { ++ uint8_t code; ++ DEBUG_MISC(" tcp fconnect errno = %d-%s", errno, strerror(errno)); ++ if (errno == ECONNREFUSED) { ++ /* ACK the SYN, send RST to refuse the connection */ ++ tcp_respond(tp, ti, m, ti->ti_seq + 1, (tcp_seq)0, ++ TH_RST | TH_ACK, af); ++ } else { ++ switch (af) { ++ case AF_INET: ++ code = ICMP_UNREACH_NET; ++ if (errno == EHOSTUNREACH) { ++ code = ICMP_UNREACH_HOST; ++ } ++ break; ++ case AF_INET6: ++ code = ICMP6_UNREACH_NO_ROUTE; ++ if (errno == EHOSTUNREACH) { ++ code = ICMP6_UNREACH_ADDRESS; ++ } ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ HTONL(ti->ti_seq); /* restore tcp header */ ++ HTONL(ti->ti_ack); ++ HTONS(ti->ti_win); ++ HTONS(ti->ti_urp); ++ m->m_data -= ++ sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr); ++ m->m_len += ++ sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr); ++ switch (af) { ++ case AF_INET: ++ m->m_data += sizeof(struct tcpiphdr) - sizeof(struct ip) - ++ sizeof(struct tcphdr); ++ m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct ip) - ++ sizeof(struct tcphdr); ++ *ip = save_ip; ++ icmp_send_error(m, ICMP_UNREACH, code, 0, strerror(errno)); ++ break; ++ case AF_INET6: ++ m->m_data += sizeof(struct tcpiphdr) - ++ (sizeof(struct ip6) + sizeof(struct tcphdr)); ++ m->m_len -= sizeof(struct tcpiphdr) - ++ (sizeof(struct ip6) + sizeof(struct tcphdr)); ++ *ip6 = save_ip6; ++ icmp6_send_error(m, ICMP6_UNREACH, code); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ } ++ tcp_close(tp); ++ m_free(m); ++ } else { ++ /* ++ * Haven't connected yet, save the current mbuf ++ * and ti, and return ++ * XXX Some OS's don't tell us whether the connect() ++ * succeeded or not. So we must time it out. ++ */ ++ so->so_m = m; ++ so->so_ti = ti; ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; ++ tp->t_state = TCPS_SYN_RECEIVED; ++ /* ++ * Initialize receive sequence numbers now so that we can send a ++ * valid RST if the remote end rejects our connection. ++ */ ++ tp->irs = ti->ti_seq; ++ tcp_rcvseqinit(tp); ++ tcp_template(tp); ++ } ++ return; ++ ++ cont_conn: ++ /* m==NULL ++ * Check if the connect succeeded ++ */ ++ if (so->so_state & SS_NOFDREF) { ++ tp = tcp_close(tp); ++ goto dropwithreset; ++ } ++ cont_input: ++ tcp_template(tp); ++ ++ if (optp) ++ tcp_dooptions(tp, (uint8_t *)optp, optlen, ti); ++ ++ if (iss) ++ tp->iss = iss; ++ else ++ tp->iss = slirp->tcp_iss; ++ slirp->tcp_iss += TCP_ISSINCR / 2; ++ tp->irs = ti->ti_seq; ++ tcp_sendseqinit(tp); ++ tcp_rcvseqinit(tp); ++ tp->t_flags |= TF_ACKNOW; ++ tp->t_state = TCPS_SYN_RECEIVED; ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; ++ goto trimthenstep6; ++ } /* case TCPS_LISTEN */ ++ ++ /* ++ * If the state is SYN_SENT: ++ * if seg contains an ACK, but not for our SYN, drop the input. ++ * if seg contains a RST, then drop the connection. ++ * if seg does not contain SYN, then drop it. ++ * Otherwise this is an acceptable SYN segment ++ * initialize tp->rcv_nxt and tp->irs ++ * if seg contains ack then advance tp->snd_una ++ * if SYN has been acked change to ESTABLISHED else SYN_RCVD state ++ * arrange for segment to be acked (eventually) ++ * continue processing rest of data/controls, beginning with URG ++ */ ++ case TCPS_SYN_SENT: ++ if ((tiflags & TH_ACK) && ++ (SEQ_LEQ(ti->ti_ack, tp->iss) || SEQ_GT(ti->ti_ack, tp->snd_max))) ++ goto dropwithreset; ++ ++ if (tiflags & TH_RST) { ++ if (tiflags & TH_ACK) { ++ tcp_drop(tp, 0); /* XXX Check t_softerror! */ ++ } ++ goto drop; ++ } ++ ++ if ((tiflags & TH_SYN) == 0) ++ goto drop; ++ if (tiflags & TH_ACK) { ++ tp->snd_una = ti->ti_ack; ++ if (SEQ_LT(tp->snd_nxt, tp->snd_una)) ++ tp->snd_nxt = tp->snd_una; ++ } ++ ++ tp->t_timer[TCPT_REXMT] = 0; ++ tp->irs = ti->ti_seq; ++ tcp_rcvseqinit(tp); ++ tp->t_flags |= TF_ACKNOW; ++ if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) { ++ soisfconnected(so); ++ tp->t_state = TCPS_ESTABLISHED; ++ ++ tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0); ++ /* ++ * if we didn't have to retransmit the SYN, ++ * use its rtt as our initial srtt & rtt var. ++ */ ++ if (tp->t_rtt) ++ tcp_xmit_timer(tp, tp->t_rtt); ++ } else ++ tp->t_state = TCPS_SYN_RECEIVED; ++ ++ trimthenstep6: ++ /* ++ * Advance ti->ti_seq to correspond to first data byte. ++ * If data, trim to stay within window, ++ * dropping FIN if necessary. ++ */ ++ ti->ti_seq++; ++ if (ti->ti_len > tp->rcv_wnd) { ++ todrop = ti->ti_len - tp->rcv_wnd; ++ m_adj(m, -todrop); ++ ti->ti_len = tp->rcv_wnd; ++ tiflags &= ~TH_FIN; ++ } ++ tp->snd_wl1 = ti->ti_seq - 1; ++ tp->rcv_up = ti->ti_seq; ++ goto step6; ++ } /* switch tp->t_state */ ++ /* ++ * States other than LISTEN or SYN_SENT. ++ * Check that at least some bytes of segment are within ++ * receive window. If segment begins before rcv_nxt, ++ * drop leading data (and SYN); if nothing left, just ack. ++ */ ++ todrop = tp->rcv_nxt - ti->ti_seq; ++ if (todrop > 0) { ++ if (tiflags & TH_SYN) { ++ tiflags &= ~TH_SYN; ++ ti->ti_seq++; ++ if (ti->ti_urp > 1) ++ ti->ti_urp--; ++ else ++ tiflags &= ~TH_URG; ++ todrop--; ++ } ++ /* ++ * Following if statement from Stevens, vol. 2, p. 960. ++ */ ++ if (todrop > ti->ti_len || ++ (todrop == ti->ti_len && (tiflags & TH_FIN) == 0)) { ++ /* ++ * Any valid FIN must be to the left of the window. ++ * At this point the FIN must be a duplicate or out ++ * of sequence; drop it. ++ */ ++ tiflags &= ~TH_FIN; ++ ++ /* ++ * Send an ACK to resynchronize and drop any data. ++ * But keep on processing for RST or ACK. ++ */ ++ tp->t_flags |= TF_ACKNOW; ++ todrop = ti->ti_len; ++ } ++ m_adj(m, todrop); ++ ti->ti_seq += todrop; ++ ti->ti_len -= todrop; ++ if (ti->ti_urp > todrop) ++ ti->ti_urp -= todrop; ++ else { ++ tiflags &= ~TH_URG; ++ ti->ti_urp = 0; ++ } ++ } ++ /* ++ * If new data are received on a connection after the ++ * user processes are gone, then RST the other end. ++ */ ++ if ((so->so_state & SS_NOFDREF) && tp->t_state > TCPS_CLOSE_WAIT && ++ ti->ti_len) { ++ tp = tcp_close(tp); ++ goto dropwithreset; ++ } ++ ++ /* ++ * If segment ends after window, drop trailing data ++ * (and PUSH and FIN); if nothing left, just ACK. ++ */ ++ todrop = (ti->ti_seq + ti->ti_len) - (tp->rcv_nxt + tp->rcv_wnd); ++ if (todrop > 0) { ++ if (todrop >= ti->ti_len) { ++ /* ++ * If a new connection request is received ++ * while in TIME_WAIT, drop the old connection ++ * and start over if the sequence numbers ++ * are above the previous ones. ++ */ ++ if (tiflags & TH_SYN && tp->t_state == TCPS_TIME_WAIT && ++ SEQ_GT(ti->ti_seq, tp->rcv_nxt)) { ++ iss = tp->rcv_nxt + TCP_ISSINCR; ++ tp = tcp_close(tp); ++ goto findso; ++ } ++ /* ++ * If window is closed can only take segments at ++ * window edge, and have to drop data and PUSH from ++ * incoming segments. Continue processing, but ++ * remember to ack. Otherwise, drop segment ++ * and ack. ++ */ ++ if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) { ++ tp->t_flags |= TF_ACKNOW; ++ } else { ++ goto dropafterack; ++ } ++ } ++ m_adj(m, -todrop); ++ ti->ti_len -= todrop; ++ tiflags &= ~(TH_PUSH | TH_FIN); ++ } ++ ++ /* ++ * If the RST bit is set examine the state: ++ * SYN_RECEIVED STATE: ++ * If passive open, return to LISTEN state. ++ * If active open, inform user that connection was refused. ++ * ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES: ++ * Inform user that connection was reset, and close tcb. ++ * CLOSING, LAST_ACK, TIME_WAIT STATES ++ * Close the tcb. ++ */ ++ if (tiflags & TH_RST) ++ switch (tp->t_state) { ++ case TCPS_SYN_RECEIVED: ++ case TCPS_ESTABLISHED: ++ case TCPS_FIN_WAIT_1: ++ case TCPS_FIN_WAIT_2: ++ case TCPS_CLOSE_WAIT: ++ tp->t_state = TCPS_CLOSED; ++ tcp_close(tp); ++ goto drop; ++ ++ case TCPS_CLOSING: ++ case TCPS_LAST_ACK: ++ case TCPS_TIME_WAIT: ++ tcp_close(tp); ++ goto drop; ++ } ++ ++ /* ++ * If a SYN is in the window, then this is an ++ * error and we send an RST and drop the connection. ++ */ ++ if (tiflags & TH_SYN) { ++ tp = tcp_drop(tp, 0); ++ goto dropwithreset; ++ } ++ ++ /* ++ * If the ACK bit is off we drop the segment and return. ++ */ ++ if ((tiflags & TH_ACK) == 0) ++ goto drop; ++ ++ /* ++ * Ack processing. ++ */ ++ switch (tp->t_state) { ++ /* ++ * In SYN_RECEIVED state if the ack ACKs our SYN then enter ++ * ESTABLISHED state and continue processing, otherwise ++ * send an RST. una<=ack<=max ++ */ ++ case TCPS_SYN_RECEIVED: ++ ++ if (SEQ_GT(tp->snd_una, ti->ti_ack) || SEQ_GT(ti->ti_ack, tp->snd_max)) ++ goto dropwithreset; ++ tp->t_state = TCPS_ESTABLISHED; ++ /* ++ * The sent SYN is ack'ed with our sequence number +1 ++ * The first data byte already in the buffer will get ++ * lost if no correction is made. This is only needed for ++ * SS_CTL since the buffer is empty otherwise. ++ * tp->snd_una++; or: ++ */ ++ tp->snd_una = ti->ti_ack; ++ if (so->so_state & SS_CTL) { ++ /* So tcp_ctl reports the right state */ ++ ret = tcp_ctl(so); ++ if (ret == 1) { ++ soisfconnected(so); ++ so->so_state &= ~SS_CTL; /* success XXX */ ++ } else if (ret == 2) { ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_NOFDREF; /* CTL_CMD */ ++ } else { ++ needoutput = 1; ++ tp->t_state = TCPS_FIN_WAIT_1; ++ } ++ } else { ++ soisfconnected(so); ++ } ++ ++ tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0); ++ tp->snd_wl1 = ti->ti_seq - 1; ++ /* Avoid ack processing; snd_una==ti_ack => dup ack */ ++ goto synrx_to_est; ++ /* fall into ... */ ++ ++ /* ++ * In ESTABLISHED state: drop duplicate ACKs; ACK out of range ++ * ACKs. If the ack is in the range ++ * tp->snd_una < ti->ti_ack <= tp->snd_max ++ * then advance tp->snd_una to ti->ti_ack and drop ++ * data from the retransmission queue. If this ACK reflects ++ * more up to date window information we update our window information. ++ */ ++ case TCPS_ESTABLISHED: ++ case TCPS_FIN_WAIT_1: ++ case TCPS_FIN_WAIT_2: ++ case TCPS_CLOSE_WAIT: ++ case TCPS_CLOSING: ++ case TCPS_LAST_ACK: ++ case TCPS_TIME_WAIT: ++ ++ if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) { ++ if (ti->ti_len == 0 && tiwin == tp->snd_wnd) { ++ DEBUG_MISC(" dup ack m = %p so = %p", m, so); ++ /* ++ * If we have outstanding data (other than ++ * a window probe), this is a completely ++ * duplicate ack (ie, window info didn't ++ * change), the ack is the biggest we've ++ * seen and we've seen exactly our rexmt ++ * threshold of them, assume a packet ++ * has been dropped and retransmit it. ++ * Kludge snd_nxt & the congestion ++ * window so we send only this one ++ * packet. ++ * ++ * We know we're losing at the current ++ * window size so do congestion avoidance ++ * (set ssthresh to half the current window ++ * and pull our congestion window back to ++ * the new ssthresh). ++ * ++ * Dup acks mean that packets have left the ++ * network (they're now cached at the receiver) ++ * so bump cwnd by the amount in the receiver ++ * to keep a constant cwnd packets in the ++ * network. ++ */ ++ if (tp->t_timer[TCPT_REXMT] == 0 || ti->ti_ack != tp->snd_una) ++ tp->t_dupacks = 0; ++ else if (++tp->t_dupacks == TCPREXMTTHRESH) { ++ tcp_seq onxt = tp->snd_nxt; ++ unsigned win = ++ MIN(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; ++ ++ if (win < 2) ++ win = 2; ++ tp->snd_ssthresh = win * tp->t_maxseg; ++ tp->t_timer[TCPT_REXMT] = 0; ++ tp->t_rtt = 0; ++ tp->snd_nxt = ti->ti_ack; ++ tp->snd_cwnd = tp->t_maxseg; ++ tcp_output(tp); ++ tp->snd_cwnd = ++ tp->snd_ssthresh + tp->t_maxseg * tp->t_dupacks; ++ if (SEQ_GT(onxt, tp->snd_nxt)) ++ tp->snd_nxt = onxt; ++ goto drop; ++ } else if (tp->t_dupacks > TCPREXMTTHRESH) { ++ tp->snd_cwnd += tp->t_maxseg; ++ tcp_output(tp); ++ goto drop; ++ } ++ } else ++ tp->t_dupacks = 0; ++ break; ++ } ++ synrx_to_est: ++ /* ++ * If the congestion window was inflated to account ++ * for the other side's cached packets, retract it. ++ */ ++ if (tp->t_dupacks > TCPREXMTTHRESH && tp->snd_cwnd > tp->snd_ssthresh) ++ tp->snd_cwnd = tp->snd_ssthresh; ++ tp->t_dupacks = 0; ++ if (SEQ_GT(ti->ti_ack, tp->snd_max)) { ++ goto dropafterack; ++ } ++ acked = ti->ti_ack - tp->snd_una; ++ ++ /* ++ * If transmit timer is running and timed sequence ++ * number was acked, update smoothed round trip time. ++ * Since we now have an rtt measurement, cancel the ++ * timer backoff (cf., Phil Karn's retransmit alg.). ++ * Recompute the initial retransmit timer. ++ */ ++ if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) ++ tcp_xmit_timer(tp, tp->t_rtt); ++ ++ /* ++ * If all outstanding data is acked, stop retransmit ++ * timer and remember to restart (more output or persist). ++ * If there is more data to be acked, restart retransmit ++ * timer, using current (possibly backed-off) value. ++ */ ++ if (ti->ti_ack == tp->snd_max) { ++ tp->t_timer[TCPT_REXMT] = 0; ++ needoutput = 1; ++ } else if (tp->t_timer[TCPT_PERSIST] == 0) ++ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; ++ /* ++ * When new data is acked, open the congestion window. ++ * If the window gives us less than ssthresh packets ++ * in flight, open exponentially (maxseg per packet). ++ * Otherwise open linearly: maxseg per window ++ * (maxseg^2 / cwnd per packet). ++ */ ++ { ++ register unsigned cw = tp->snd_cwnd; ++ register unsigned incr = tp->t_maxseg; ++ ++ if (cw > tp->snd_ssthresh) ++ incr = incr * incr / cw; ++ tp->snd_cwnd = MIN(cw + incr, TCP_MAXWIN << tp->snd_scale); ++ } ++ if (acked > so->so_snd.sb_cc) { ++ tp->snd_wnd -= so->so_snd.sb_cc; ++ sodrop(so, (int)so->so_snd.sb_cc); ++ ourfinisacked = 1; ++ } else { ++ sodrop(so, acked); ++ tp->snd_wnd -= acked; ++ ourfinisacked = 0; ++ } ++ tp->snd_una = ti->ti_ack; ++ if (SEQ_LT(tp->snd_nxt, tp->snd_una)) ++ tp->snd_nxt = tp->snd_una; ++ ++ switch (tp->t_state) { ++ /* ++ * In FIN_WAIT_1 STATE in addition to the processing ++ * for the ESTABLISHED state if our FIN is now acknowledged ++ * then enter FIN_WAIT_2. ++ */ ++ case TCPS_FIN_WAIT_1: ++ if (ourfinisacked) { ++ /* ++ * If we can't receive any more ++ * data, then closing user can proceed. ++ * Starting the timer is contrary to the ++ * specification, but if we don't get a FIN ++ * we'll hang forever. ++ */ ++ if (so->so_state & SS_FCANTRCVMORE) { ++ tp->t_timer[TCPT_2MSL] = TCP_MAXIDLE; ++ } ++ tp->t_state = TCPS_FIN_WAIT_2; ++ } ++ break; ++ ++ /* ++ * In CLOSING STATE in addition to the processing for ++ * the ESTABLISHED state if the ACK acknowledges our FIN ++ * then enter the TIME-WAIT state, otherwise ignore ++ * the segment. ++ */ ++ case TCPS_CLOSING: ++ if (ourfinisacked) { ++ tp->t_state = TCPS_TIME_WAIT; ++ tcp_canceltimers(tp); ++ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; ++ } ++ break; ++ ++ /* ++ * In LAST_ACK, we may still be waiting for data to drain ++ * and/or to be acked, as well as for the ack of our FIN. ++ * If our FIN is now acknowledged, delete the TCB, ++ * enter the closed state and return. ++ */ ++ case TCPS_LAST_ACK: ++ if (ourfinisacked) { ++ tcp_close(tp); ++ goto drop; ++ } ++ break; ++ ++ /* ++ * In TIME_WAIT state the only thing that should arrive ++ * is a retransmission of the remote FIN. Acknowledge ++ * it and restart the finack timer. ++ */ ++ case TCPS_TIME_WAIT: ++ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; ++ goto dropafterack; ++ } ++ } /* switch(tp->t_state) */ ++ ++step6: ++ /* ++ * Update window information. ++ * Don't look at window if no ACK: TAC's send garbage on first SYN. ++ */ ++ if ((tiflags & TH_ACK) && ++ (SEQ_LT(tp->snd_wl1, ti->ti_seq) || ++ (tp->snd_wl1 == ti->ti_seq && ++ (SEQ_LT(tp->snd_wl2, ti->ti_ack) || ++ (tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))))) { ++ tp->snd_wnd = tiwin; ++ tp->snd_wl1 = ti->ti_seq; ++ tp->snd_wl2 = ti->ti_ack; ++ if (tp->snd_wnd > tp->max_sndwnd) ++ tp->max_sndwnd = tp->snd_wnd; ++ needoutput = 1; ++ } ++ ++ /* ++ * Process segments with URG. ++ */ ++ if ((tiflags & TH_URG) && ti->ti_urp && ++ TCPS_HAVERCVDFIN(tp->t_state) == 0) { ++ /* ++ * This is a kludge, but if we receive and accept ++ * random urgent pointers, we'll crash in ++ * soreceive. It's hard to imagine someone ++ * actually wanting to send this much urgent data. ++ */ ++ if (ti->ti_urp + so->so_rcv.sb_cc > so->so_rcv.sb_datalen) { ++ ti->ti_urp = 0; ++ tiflags &= ~TH_URG; ++ goto dodata; ++ } ++ /* ++ * If this segment advances the known urgent pointer, ++ * then mark the data stream. This should not happen ++ * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since ++ * a FIN has been received from the remote side. ++ * In these states we ignore the URG. ++ * ++ * According to RFC961 (Assigned Protocols), ++ * the urgent pointer points to the last octet ++ * of urgent data. We continue, however, ++ * to consider it to indicate the first octet ++ * of data past the urgent section as the original ++ * spec states (in one of two places). ++ */ ++ if (SEQ_GT(ti->ti_seq + ti->ti_urp, tp->rcv_up)) { ++ tp->rcv_up = ti->ti_seq + ti->ti_urp; ++ so->so_urgc = ++ so->so_rcv.sb_cc + (tp->rcv_up - tp->rcv_nxt); /* -1; */ ++ tp->rcv_up = ti->ti_seq + ti->ti_urp; ++ } ++ } else ++ /* ++ * If no out of band data is expected, ++ * pull receive urgent pointer along ++ * with the receive window. ++ */ ++ if (SEQ_GT(tp->rcv_nxt, tp->rcv_up)) ++ tp->rcv_up = tp->rcv_nxt; ++dodata: ++ ++ /* ++ * If this is a small packet, then ACK now - with Nagel ++ * congestion avoidance sender won't send more until ++ * he gets an ACK. ++ */ ++ if (ti->ti_len && (unsigned)ti->ti_len <= 5 && ++ ((struct tcpiphdr_2 *)ti)->first_char == (char)27) { ++ tp->t_flags |= TF_ACKNOW; ++ } ++ ++ /* ++ * Process the segment text, merging it into the TCP sequencing queue, ++ * and arranging for acknowledgment of receipt if necessary. ++ * This process logically involves adjusting tp->rcv_wnd as data ++ * is presented to the user (this happens in tcp_usrreq.c, ++ * case PRU_RCVD). If a FIN has already been received on this ++ * connection then we just ignore the text. ++ */ ++ if ((ti->ti_len || (tiflags & TH_FIN)) && ++ TCPS_HAVERCVDFIN(tp->t_state) == 0) { ++ TCP_REASS(tp, ti, m, so, tiflags); ++ } else { ++ m_free(m); ++ tiflags &= ~TH_FIN; ++ } ++ ++ /* ++ * If FIN is received ACK the FIN and let the user know ++ * that the connection is closing. ++ */ ++ if (tiflags & TH_FIN) { ++ if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { ++ /* ++ * If we receive a FIN we can't send more data, ++ * set it SS_FDRAIN ++ * Shutdown the socket if there is no rx data in the ++ * buffer. ++ * soread() is called on completion of shutdown() and ++ * will got to TCPS_LAST_ACK, and use tcp_output() ++ * to send the FIN. ++ */ ++ sofwdrain(so); ++ ++ tp->t_flags |= TF_ACKNOW; ++ tp->rcv_nxt++; ++ } ++ switch (tp->t_state) { ++ /* ++ * In SYN_RECEIVED and ESTABLISHED STATES ++ * enter the CLOSE_WAIT state. ++ */ ++ case TCPS_SYN_RECEIVED: ++ case TCPS_ESTABLISHED: ++ if (so->so_emu == EMU_CTL) /* no shutdown on socket */ ++ tp->t_state = TCPS_LAST_ACK; ++ else ++ tp->t_state = TCPS_CLOSE_WAIT; ++ break; ++ ++ /* ++ * If still in FIN_WAIT_1 STATE FIN has not been acked so ++ * enter the CLOSING state. ++ */ ++ case TCPS_FIN_WAIT_1: ++ tp->t_state = TCPS_CLOSING; ++ break; ++ ++ /* ++ * In FIN_WAIT_2 state enter the TIME_WAIT state, ++ * starting the time-wait timer, turning off the other ++ * standard timers. ++ */ ++ case TCPS_FIN_WAIT_2: ++ tp->t_state = TCPS_TIME_WAIT; ++ tcp_canceltimers(tp); ++ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; ++ break; ++ ++ /* ++ * In TIME_WAIT state restart the 2 MSL time_wait timer. ++ */ ++ case TCPS_TIME_WAIT: ++ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; ++ break; ++ } ++ } ++ ++ /* ++ * Return any desired output. ++ */ ++ if (needoutput || (tp->t_flags & TF_ACKNOW)) { ++ tcp_output(tp); ++ } ++ return; ++ ++dropafterack: ++ /* ++ * Generate an ACK dropping incoming segment if it occupies ++ * sequence space, where the ACK reflects our state. ++ */ ++ if (tiflags & TH_RST) ++ goto drop; ++ m_free(m); ++ tp->t_flags |= TF_ACKNOW; ++ tcp_output(tp); ++ return; ++ ++dropwithreset: ++ /* reuses m if m!=NULL, m_free() unnecessary */ ++ if (tiflags & TH_ACK) ++ tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST, af); ++ else { ++ if (tiflags & TH_SYN) ++ ti->ti_len++; ++ tcp_respond(tp, ti, m, ti->ti_seq + ti->ti_len, (tcp_seq)0, ++ TH_RST | TH_ACK, af); ++ } ++ ++ return; ++ ++drop: ++ /* ++ * Drop space held by incoming segment and return. ++ */ ++ m_free(m); ++} ++ ++static void tcp_dooptions(struct tcpcb *tp, uint8_t *cp, int cnt, ++ struct tcpiphdr *ti) ++{ ++ uint16_t mss; ++ int opt, optlen; ++ ++ DEBUG_CALL("tcp_dooptions"); ++ DEBUG_ARG("tp = %p cnt=%i", tp, cnt); ++ ++ for (; cnt > 0; cnt -= optlen, cp += optlen) { ++ opt = cp[0]; ++ if (opt == TCPOPT_EOL) ++ break; ++ if (opt == TCPOPT_NOP) ++ optlen = 1; ++ else { ++ optlen = cp[1]; ++ if (optlen <= 0) ++ break; ++ } ++ switch (opt) { ++ default: ++ continue; ++ ++ case TCPOPT_MAXSEG: ++ if (optlen != TCPOLEN_MAXSEG) ++ continue; ++ if (!(ti->ti_flags & TH_SYN)) ++ continue; ++ memcpy((char *)&mss, (char *)cp + 2, sizeof(mss)); ++ NTOHS(mss); ++ tcp_mss(tp, mss); /* sets t_maxseg */ ++ break; ++ } ++ } ++} ++ ++/* ++ * Collect new round-trip time estimate ++ * and update averages and current timeout. ++ */ ++ ++static void tcp_xmit_timer(register struct tcpcb *tp, int rtt) ++{ ++ register short delta; ++ ++ DEBUG_CALL("tcp_xmit_timer"); ++ DEBUG_ARG("tp = %p", tp); ++ DEBUG_ARG("rtt = %d", rtt); ++ ++ if (tp->t_srtt != 0) { ++ /* ++ * srtt is stored as fixed point with 3 bits after the ++ * binary point (i.e., scaled by 8). The following magic ++ * is equivalent to the smoothing algorithm in rfc793 with ++ * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed ++ * point). Adjust rtt to origin 0. ++ */ ++ delta = rtt - 1 - (tp->t_srtt >> TCP_RTT_SHIFT); ++ if ((tp->t_srtt += delta) <= 0) ++ tp->t_srtt = 1; ++ /* ++ * We accumulate a smoothed rtt variance (actually, a ++ * smoothed mean difference), then set the retransmit ++ * timer to smoothed rtt + 4 times the smoothed variance. ++ * rttvar is stored as fixed point with 2 bits after the ++ * binary point (scaled by 4). The following is ++ * equivalent to rfc793 smoothing with an alpha of .75 ++ * (rttvar = rttvar*3/4 + |delta| / 4). This replaces ++ * rfc793's wired-in beta. ++ */ ++ if (delta < 0) ++ delta = -delta; ++ delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT); ++ if ((tp->t_rttvar += delta) <= 0) ++ tp->t_rttvar = 1; ++ } else { ++ /* ++ * No rtt measurement yet - use the unsmoothed rtt. ++ * Set the variance to half the rtt (so our first ++ * retransmit happens at 3*rtt). ++ */ ++ tp->t_srtt = rtt << TCP_RTT_SHIFT; ++ tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1); ++ } ++ tp->t_rtt = 0; ++ tp->t_rxtshift = 0; ++ ++ /* ++ * the retransmit should happen at rtt + 4 * rttvar. ++ * Because of the way we do the smoothing, srtt and rttvar ++ * will each average +1/2 tick of bias. When we compute ++ * the retransmit timer, we want 1/2 tick of rounding and ++ * 1 extra tick because of +-1/2 tick uncertainty in the ++ * firing of the timer. The bias will give us exactly the ++ * 1.5 tick we need. But, because the bias is ++ * statistical, we have to test that we don't drop below ++ * the minimum feasible timer (which is 2 ticks). ++ */ ++ TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), (short)tp->t_rttmin, ++ TCPTV_REXMTMAX); /* XXX */ ++ ++ /* ++ * We received an ack for a packet that wasn't retransmitted; ++ * it is probably safe to discard any error indications we've ++ * received recently. This isn't quite right, but close enough ++ * for now (a route might have failed after we sent a segment, ++ * and the return path might not be symmetrical). ++ */ ++ tp->t_softerror = 0; ++} ++ ++/* ++ * Determine a reasonable value for maxseg size. ++ * If the route is known, check route for mtu. ++ * If none, use an mss that can be handled on the outgoing ++ * interface without forcing IP to fragment; if bigger than ++ * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES ++ * to utilize large mbufs. If no route is found, route has no mtu, ++ * or the destination isn't local, use a default, hopefully conservative ++ * size (usually 512 or the default IP max size, but no more than the mtu ++ * of the interface), as we can't discover anything about intervening ++ * gateways or networks. We also initialize the congestion/slow start ++ * window to be a single segment if the destination isn't local. ++ * While looking at the routing entry, we also initialize other path-dependent ++ * parameters from pre-set or cached values in the routing entry. ++ */ ++ ++int tcp_mss(struct tcpcb *tp, unsigned offer) ++{ ++ struct socket *so = tp->t_socket; ++ int mss; ++ ++ DEBUG_CALL("tcp_mss"); ++ DEBUG_ARG("tp = %p", tp); ++ DEBUG_ARG("offer = %d", offer); ++ ++ switch (so->so_ffamily) { ++ case AF_INET: ++ mss = MIN(so->slirp->if_mtu, so->slirp->if_mru) - ++ sizeof(struct tcphdr) - sizeof(struct ip); ++ break; ++ case AF_INET6: ++ mss = MIN(so->slirp->if_mtu, so->slirp->if_mru) - ++ sizeof(struct tcphdr) - sizeof(struct ip6); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ ++ if (offer) ++ mss = MIN(mss, offer); ++ mss = MAX(mss, 32); ++ if (mss < tp->t_maxseg || offer != 0) ++ tp->t_maxseg = MIN(mss, TCP_MAXSEG_MAX); ++ ++ tp->snd_cwnd = mss; ++ ++ sbreserve(&so->so_snd, ++ TCP_SNDSPACE + ++ ((TCP_SNDSPACE % mss) ? (mss - (TCP_SNDSPACE % mss)) : 0)); ++ sbreserve(&so->so_rcv, ++ TCP_RCVSPACE + ++ ((TCP_RCVSPACE % mss) ? (mss - (TCP_RCVSPACE % mss)) : 0)); ++ ++ DEBUG_MISC(" returning mss = %d", mss); ++ ++ return mss; ++} +diff --git a/slirp/src/tcp_output.c b/slirp/src/tcp_output.c +new file mode 100644 +index 0000000000..383fe31dcf +--- /dev/null ++++ b/slirp/src/tcp_output.c +@@ -0,0 +1,516 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_output.c 8.3 (Berkeley) 12/30/93 ++ * tcp_output.c,v 1.3 1994/09/15 10:36:55 davidg Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++ ++static const uint8_t tcp_outflags[TCP_NSTATES] = { ++ TH_RST | TH_ACK, 0, TH_SYN, TH_SYN | TH_ACK, ++ TH_ACK, TH_ACK, TH_FIN | TH_ACK, TH_FIN | TH_ACK, ++ TH_FIN | TH_ACK, TH_ACK, TH_ACK, ++}; ++ ++ ++#undef MAX_TCPOPTLEN ++#define MAX_TCPOPTLEN 32 /* max # bytes that go in options */ ++ ++/* ++ * Tcp output routine: figure out what should be sent and send it. ++ */ ++int tcp_output(struct tcpcb *tp) ++{ ++ register struct socket *so = tp->t_socket; ++ register long len, win; ++ int off, flags, error; ++ register struct mbuf *m; ++ register struct tcpiphdr *ti, tcpiph_save; ++ struct ip *ip; ++ struct ip6 *ip6; ++ uint8_t opt[MAX_TCPOPTLEN]; ++ unsigned optlen, hdrlen; ++ int idle, sendalot; ++ ++ DEBUG_CALL("tcp_output"); ++ DEBUG_ARG("tp = %p", tp); ++ ++ /* ++ * Determine length of data that should be transmitted, ++ * and flags that will be used. ++ * If there is some data or critical controls (SYN, RST) ++ * to send, then transmit; otherwise, investigate further. ++ */ ++ idle = (tp->snd_max == tp->snd_una); ++ if (idle && tp->t_idle >= tp->t_rxtcur) ++ /* ++ * We have been idle for "a while" and no acks are ++ * expected to clock out any data we send -- ++ * slow start to get ack "clock" running again. ++ */ ++ tp->snd_cwnd = tp->t_maxseg; ++again: ++ sendalot = 0; ++ off = tp->snd_nxt - tp->snd_una; ++ win = MIN(tp->snd_wnd, tp->snd_cwnd); ++ ++ flags = tcp_outflags[tp->t_state]; ++ ++ DEBUG_MISC(" --- tcp_output flags = 0x%x", flags); ++ ++ /* ++ * If in persist timeout with window of 0, send 1 byte. ++ * Otherwise, if window is small but nonzero ++ * and timer expired, we will send what we can ++ * and go to transmit state. ++ */ ++ if (tp->t_force) { ++ if (win == 0) { ++ /* ++ * If we still have some data to send, then ++ * clear the FIN bit. Usually this would ++ * happen below when it realizes that we ++ * aren't sending all the data. However, ++ * if we have exactly 1 byte of unset data, ++ * then it won't clear the FIN bit below, ++ * and if we are in persist state, we wind ++ * up sending the packet without recording ++ * that we sent the FIN bit. ++ * ++ * We can't just blindly clear the FIN bit, ++ * because if we don't have any more data ++ * to send then the probe will be the FIN ++ * itself. ++ */ ++ if (off < so->so_snd.sb_cc) ++ flags &= ~TH_FIN; ++ win = 1; ++ } else { ++ tp->t_timer[TCPT_PERSIST] = 0; ++ tp->t_rxtshift = 0; ++ } ++ } ++ ++ len = MIN(so->so_snd.sb_cc, win) - off; ++ ++ if (len < 0) { ++ /* ++ * If FIN has been sent but not acked, ++ * but we haven't been called to retransmit, ++ * len will be -1. Otherwise, window shrank ++ * after we sent into it. If window shrank to 0, ++ * cancel pending retransmit and pull snd_nxt ++ * back to (closed) window. We will enter persist ++ * state below. If the window didn't close completely, ++ * just wait for an ACK. ++ */ ++ len = 0; ++ if (win == 0) { ++ tp->t_timer[TCPT_REXMT] = 0; ++ tp->snd_nxt = tp->snd_una; ++ } ++ } ++ ++ if (len > tp->t_maxseg) { ++ len = tp->t_maxseg; ++ sendalot = 1; ++ } ++ if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc)) ++ flags &= ~TH_FIN; ++ ++ win = sbspace(&so->so_rcv); ++ ++ /* ++ * Sender silly window avoidance. If connection is idle ++ * and can send all data, a maximum segment, ++ * at least a maximum default-size segment do it, ++ * or are forced, do it; otherwise don't bother. ++ * If peer's buffer is tiny, then send ++ * when window is at least half open. ++ * If retransmitting (possibly after persist timer forced us ++ * to send into a small window), then must resend. ++ */ ++ if (len) { ++ if (len == tp->t_maxseg) ++ goto send; ++ if ((1 || idle || tp->t_flags & TF_NODELAY) && ++ len + off >= so->so_snd.sb_cc) ++ goto send; ++ if (tp->t_force) ++ goto send; ++ if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0) ++ goto send; ++ if (SEQ_LT(tp->snd_nxt, tp->snd_max)) ++ goto send; ++ } ++ ++ /* ++ * Compare available window to amount of window ++ * known to peer (as advertised window less ++ * next expected input). If the difference is at least two ++ * max size segments, or at least 50% of the maximum possible ++ * window, then want to send a window update to peer. ++ */ ++ if (win > 0) { ++ /* ++ * "adv" is the amount we can increase the window, ++ * taking into account that we are limited by ++ * TCP_MAXWIN << tp->rcv_scale. ++ */ ++ long adv = MIN(win, (long)TCP_MAXWIN << tp->rcv_scale) - ++ (tp->rcv_adv - tp->rcv_nxt); ++ ++ if (adv >= (long)(2 * tp->t_maxseg)) ++ goto send; ++ if (2 * adv >= (long)so->so_rcv.sb_datalen) ++ goto send; ++ } ++ ++ /* ++ * Send if we owe peer an ACK. ++ */ ++ if (tp->t_flags & TF_ACKNOW) ++ goto send; ++ if (flags & (TH_SYN | TH_RST)) ++ goto send; ++ if (SEQ_GT(tp->snd_up, tp->snd_una)) ++ goto send; ++ /* ++ * If our state indicates that FIN should be sent ++ * and we have not yet done so, or we're retransmitting the FIN, ++ * then we need to send. ++ */ ++ if (flags & TH_FIN && ++ ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una)) ++ goto send; ++ ++ /* ++ * TCP window updates are not reliable, rather a polling protocol ++ * using ``persist'' packets is used to insure receipt of window ++ * updates. The three ``states'' for the output side are: ++ * idle not doing retransmits or persists ++ * persisting to move a small or zero window ++ * (re)transmitting and thereby not persisting ++ * ++ * tp->t_timer[TCPT_PERSIST] ++ * is set when we are in persist state. ++ * tp->t_force ++ * is set when we are called to send a persist packet. ++ * tp->t_timer[TCPT_REXMT] ++ * is set when we are retransmitting ++ * The output side is idle when both timers are zero. ++ * ++ * If send window is too small, there is data to transmit, and no ++ * retransmit or persist is pending, then go to persist state. ++ * If nothing happens soon, send when timer expires: ++ * if window is nonzero, transmit what we can, ++ * otherwise force out a byte. ++ */ ++ if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 && ++ tp->t_timer[TCPT_PERSIST] == 0) { ++ tp->t_rxtshift = 0; ++ tcp_setpersist(tp); ++ } ++ ++ /* ++ * No reason to send a segment, just return. ++ */ ++ return (0); ++ ++send: ++ /* ++ * Before ESTABLISHED, force sending of initial options ++ * unless TCP set not to do any options. ++ * NOTE: we assume that the IP/TCP header plus TCP options ++ * always fit in a single mbuf, leaving room for a maximum ++ * link header, i.e. ++ * max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MHLEN ++ */ ++ optlen = 0; ++ hdrlen = sizeof(struct tcpiphdr); ++ if (flags & TH_SYN) { ++ tp->snd_nxt = tp->iss; ++ if ((tp->t_flags & TF_NOOPT) == 0) { ++ uint16_t mss; ++ ++ opt[0] = TCPOPT_MAXSEG; ++ opt[1] = 4; ++ mss = htons((uint16_t)tcp_mss(tp, 0)); ++ memcpy((char *)(opt + 2), (char *)&mss, sizeof(mss)); ++ optlen = 4; ++ } ++ } ++ ++ hdrlen += optlen; ++ ++ /* ++ * Adjust data length if insertion of options will ++ * bump the packet length beyond the t_maxseg length. ++ */ ++ if (len > tp->t_maxseg - optlen) { ++ len = tp->t_maxseg - optlen; ++ sendalot = 1; ++ } ++ ++ /* ++ * Grab a header mbuf, attaching a copy of data to ++ * be transmitted, and initialize the header from ++ * the template for sends on this connection. ++ */ ++ if (len) { ++ m = m_get(so->slirp); ++ if (m == NULL) { ++ error = 1; ++ goto out; ++ } ++ m->m_data += IF_MAXLINKHDR; ++ m->m_len = hdrlen; ++ ++ sbcopy(&so->so_snd, off, (int)len, mtod(m, char *) + hdrlen); ++ m->m_len += len; ++ ++ /* ++ * If we're sending everything we've got, set PUSH. ++ * (This will keep happy those implementations which only ++ * give data to the user when a buffer fills or ++ * a PUSH comes in.) ++ */ ++ if (off + len == so->so_snd.sb_cc) ++ flags |= TH_PUSH; ++ } else { ++ m = m_get(so->slirp); ++ if (m == NULL) { ++ error = 1; ++ goto out; ++ } ++ m->m_data += IF_MAXLINKHDR; ++ m->m_len = hdrlen; ++ } ++ ++ ti = mtod(m, struct tcpiphdr *); ++ ++ memcpy((char *)ti, &tp->t_template, sizeof(struct tcpiphdr)); ++ ++ /* ++ * Fill in fields, remembering maximum advertised ++ * window for use in delaying messages about window sizes. ++ * If resending a FIN, be sure not to use a new sequence number. ++ */ ++ if (flags & TH_FIN && tp->t_flags & TF_SENTFIN && ++ tp->snd_nxt == tp->snd_max) ++ tp->snd_nxt--; ++ /* ++ * If we are doing retransmissions, then snd_nxt will ++ * not reflect the first unsent octet. For ACK only ++ * packets, we do not want the sequence number of the ++ * retransmitted packet, we want the sequence number ++ * of the next unsent octet. So, if there is no data ++ * (and no SYN or FIN), use snd_max instead of snd_nxt ++ * when filling in ti_seq. But if we are in persist ++ * state, snd_max might reflect one byte beyond the ++ * right edge of the window, so use snd_nxt in that ++ * case, since we know we aren't doing a retransmission. ++ * (retransmit and persist are mutually exclusive...) ++ */ ++ if (len || (flags & (TH_SYN | TH_FIN)) || tp->t_timer[TCPT_PERSIST]) ++ ti->ti_seq = htonl(tp->snd_nxt); ++ else ++ ti->ti_seq = htonl(tp->snd_max); ++ ti->ti_ack = htonl(tp->rcv_nxt); ++ if (optlen) { ++ memcpy((char *)(ti + 1), (char *)opt, optlen); ++ ti->ti_off = (sizeof(struct tcphdr) + optlen) >> 2; ++ } ++ ti->ti_flags = flags; ++ /* ++ * Calculate receive window. Don't shrink window, ++ * but avoid silly window syndrome. ++ */ ++ if (win < (long)(so->so_rcv.sb_datalen / 4) && win < (long)tp->t_maxseg) ++ win = 0; ++ if (win > (long)TCP_MAXWIN << tp->rcv_scale) ++ win = (long)TCP_MAXWIN << tp->rcv_scale; ++ if (win < (long)(tp->rcv_adv - tp->rcv_nxt)) ++ win = (long)(tp->rcv_adv - tp->rcv_nxt); ++ ti->ti_win = htons((uint16_t)(win >> tp->rcv_scale)); ++ ++ if (SEQ_GT(tp->snd_up, tp->snd_una)) { ++ ti->ti_urp = htons((uint16_t)(tp->snd_up - ntohl(ti->ti_seq))); ++ ti->ti_flags |= TH_URG; ++ } else ++ /* ++ * If no urgent pointer to send, then we pull ++ * the urgent pointer to the left edge of the send window ++ * so that it doesn't drift into the send window on sequence ++ * number wraparound. ++ */ ++ tp->snd_up = tp->snd_una; /* drag it along */ ++ ++ /* ++ * Put TCP length in extended header, and then ++ * checksum extended header and data. ++ */ ++ if (len + optlen) ++ ti->ti_len = htons((uint16_t)(sizeof(struct tcphdr) + optlen + len)); ++ ti->ti_sum = cksum(m, (int)(hdrlen + len)); ++ ++ /* ++ * In transmit state, time the transmission and arrange for ++ * the retransmit. In persist state, just set snd_max. ++ */ ++ if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) { ++ tcp_seq startseq = tp->snd_nxt; ++ ++ /* ++ * Advance snd_nxt over sequence space of this segment. ++ */ ++ if (flags & (TH_SYN | TH_FIN)) { ++ if (flags & TH_SYN) ++ tp->snd_nxt++; ++ if (flags & TH_FIN) { ++ tp->snd_nxt++; ++ tp->t_flags |= TF_SENTFIN; ++ } ++ } ++ tp->snd_nxt += len; ++ if (SEQ_GT(tp->snd_nxt, tp->snd_max)) { ++ tp->snd_max = tp->snd_nxt; ++ /* ++ * Time this transmission if not a retransmission and ++ * not currently timing anything. ++ */ ++ if (tp->t_rtt == 0) { ++ tp->t_rtt = 1; ++ tp->t_rtseq = startseq; ++ } ++ } ++ ++ /* ++ * Set retransmit timer if not currently set, ++ * and not doing an ack or a keep-alive probe. ++ * Initial value for retransmit timer is smoothed ++ * round-trip time + 2 * round-trip time variance. ++ * Initialize shift counter which is used for backoff ++ * of retransmit time. ++ */ ++ if (tp->t_timer[TCPT_REXMT] == 0 && tp->snd_nxt != tp->snd_una) { ++ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; ++ if (tp->t_timer[TCPT_PERSIST]) { ++ tp->t_timer[TCPT_PERSIST] = 0; ++ tp->t_rxtshift = 0; ++ } ++ } ++ } else if (SEQ_GT(tp->snd_nxt + len, tp->snd_max)) ++ tp->snd_max = tp->snd_nxt + len; ++ ++ /* ++ * Fill in IP length and desired time to live and ++ * send to IP level. There should be a better way ++ * to handle ttl and tos; we could keep them in ++ * the template, but need a way to checksum without them. ++ */ ++ m->m_len = hdrlen + len; /* XXX Needed? m_len should be correct */ ++ tcpiph_save = *mtod(m, struct tcpiphdr *); ++ ++ switch (so->so_ffamily) { ++ case AF_INET: ++ m->m_data += ++ sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); ++ m->m_len -= ++ sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); ++ ip = mtod(m, struct ip *); ++ ++ ip->ip_len = m->m_len; ++ ip->ip_dst = tcpiph_save.ti_dst; ++ ip->ip_src = tcpiph_save.ti_src; ++ ip->ip_p = tcpiph_save.ti_pr; ++ ++ ip->ip_ttl = IPDEFTTL; ++ ip->ip_tos = so->so_iptos; ++ error = ip_output(so, m); ++ break; ++ ++ case AF_INET6: ++ m->m_data += sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - ++ sizeof(struct ip6); ++ m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - ++ sizeof(struct ip6); ++ ip6 = mtod(m, struct ip6 *); ++ ++ ip6->ip_pl = tcpiph_save.ti_len; ++ ip6->ip_dst = tcpiph_save.ti_dst6; ++ ip6->ip_src = tcpiph_save.ti_src6; ++ ip6->ip_nh = tcpiph_save.ti_nh6; ++ ++ error = ip6_output(so, m, 0); ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ } ++ ++ if (error) { ++ out: ++ return (error); ++ } ++ ++ /* ++ * Data sent (as far as we can tell). ++ * If this advertises a larger window than any other segment, ++ * then remember the size of the advertised window. ++ * Any pending ACK has now been sent. ++ */ ++ if (win > 0 && SEQ_GT(tp->rcv_nxt + win, tp->rcv_adv)) ++ tp->rcv_adv = tp->rcv_nxt + win; ++ tp->last_ack_sent = tp->rcv_nxt; ++ tp->t_flags &= ~(TF_ACKNOW | TF_DELACK); ++ if (sendalot) ++ goto again; ++ ++ return (0); ++} ++ ++void tcp_setpersist(struct tcpcb *tp) ++{ ++ int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1; ++ ++ /* ++ * Start/restart persistence timer. ++ */ ++ TCPT_RANGESET(tp->t_timer[TCPT_PERSIST], t * tcp_backoff[tp->t_rxtshift], ++ TCPTV_PERSMIN, TCPTV_PERSMAX); ++ if (tp->t_rxtshift < TCP_MAXRXTSHIFT) ++ tp->t_rxtshift++; ++} +diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c +new file mode 100644 +index 0000000000..600cfa1456 +--- /dev/null ++++ b/slirp/src/tcp_subr.c +@@ -0,0 +1,1011 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_subr.c 8.1 (Berkeley) 6/10/93 ++ * tcp_subr.c,v 1.5 1994/10/08 22:39:58 phk Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++ ++/* patchable/settable parameters for tcp */ ++/* Don't do rfc1323 performance enhancements */ ++#define TCP_DO_RFC1323 0 ++ ++/* ++ * Tcp initialization ++ */ ++void tcp_init(Slirp *slirp) ++{ ++ slirp->tcp_iss = 1; /* wrong */ ++ slirp->tcb.so_next = slirp->tcb.so_prev = &slirp->tcb; ++ slirp->tcp_last_so = &slirp->tcb; ++} ++ ++void tcp_cleanup(Slirp *slirp) ++{ ++ while (slirp->tcb.so_next != &slirp->tcb) { ++ tcp_close(sototcpcb(slirp->tcb.so_next)); ++ } ++} ++ ++/* ++ * Create template to be used to send tcp packets on a connection. ++ * Call after host entry created, fills ++ * in a skeletal tcp/ip header, minimizing the amount of work ++ * necessary when the connection is used. ++ */ ++void tcp_template(struct tcpcb *tp) ++{ ++ struct socket *so = tp->t_socket; ++ register struct tcpiphdr *n = &tp->t_template; ++ ++ n->ti_mbuf = NULL; ++ memset(&n->ti, 0, sizeof(n->ti)); ++ n->ti_x0 = 0; ++ switch (so->so_ffamily) { ++ case AF_INET: ++ n->ti_pr = IPPROTO_TCP; ++ n->ti_len = htons(sizeof(struct tcphdr)); ++ n->ti_src = so->so_faddr; ++ n->ti_dst = so->so_laddr; ++ n->ti_sport = so->so_fport; ++ n->ti_dport = so->so_lport; ++ break; ++ ++ case AF_INET6: ++ n->ti_nh6 = IPPROTO_TCP; ++ n->ti_len = htons(sizeof(struct tcphdr)); ++ n->ti_src6 = so->so_faddr6; ++ n->ti_dst6 = so->so_laddr6; ++ n->ti_sport = so->so_fport6; ++ n->ti_dport = so->so_lport6; ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ } ++ ++ n->ti_seq = 0; ++ n->ti_ack = 0; ++ n->ti_x2 = 0; ++ n->ti_off = 5; ++ n->ti_flags = 0; ++ n->ti_win = 0; ++ n->ti_sum = 0; ++ n->ti_urp = 0; ++} ++ ++/* ++ * Send a single message to the TCP at address specified by ++ * the given TCP/IP header. If m == 0, then we make a copy ++ * of the tcpiphdr at ti and send directly to the addressed host. ++ * This is used to force keep alive messages out using the TCP ++ * template for a connection tp->t_template. If flags are given ++ * then we send a message back to the TCP which originated the ++ * segment ti, and discard the mbuf containing it and any other ++ * attached mbufs. ++ * ++ * In any case the ack and sequence number of the transmitted ++ * segment are as specified by the parameters. ++ */ ++void tcp_respond(struct tcpcb *tp, struct tcpiphdr *ti, struct mbuf *m, ++ tcp_seq ack, tcp_seq seq, int flags, unsigned short af) ++{ ++ register int tlen; ++ int win = 0; ++ ++ DEBUG_CALL("tcp_respond"); ++ DEBUG_ARG("tp = %p", tp); ++ DEBUG_ARG("ti = %p", ti); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("ack = %u", ack); ++ DEBUG_ARG("seq = %u", seq); ++ DEBUG_ARG("flags = %x", flags); ++ ++ if (tp) ++ win = sbspace(&tp->t_socket->so_rcv); ++ if (m == NULL) { ++ if (!tp || (m = m_get(tp->t_socket->slirp)) == NULL) ++ return; ++ tlen = 0; ++ m->m_data += IF_MAXLINKHDR; ++ *mtod(m, struct tcpiphdr *) = *ti; ++ ti = mtod(m, struct tcpiphdr *); ++ switch (af) { ++ case AF_INET: ++ ti->ti.ti_i4.ih_x1 = 0; ++ break; ++ case AF_INET6: ++ ti->ti.ti_i6.ih_x1 = 0; ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ flags = TH_ACK; ++ } else { ++ /* ++ * ti points into m so the next line is just making ++ * the mbuf point to ti ++ */ ++ m->m_data = (char *)ti; ++ ++ m->m_len = sizeof(struct tcpiphdr); ++ tlen = 0; ++#define xchg(a, b, type) \ ++ { \ ++ type t; \ ++ t = a; \ ++ a = b; \ ++ b = t; \ ++ } ++ switch (af) { ++ case AF_INET: ++ xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, uint32_t); ++ xchg(ti->ti_dport, ti->ti_sport, uint16_t); ++ break; ++ case AF_INET6: ++ xchg(ti->ti_dst6, ti->ti_src6, struct in6_addr); ++ xchg(ti->ti_dport, ti->ti_sport, uint16_t); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++#undef xchg ++ } ++ ti->ti_len = htons((uint16_t)(sizeof(struct tcphdr) + tlen)); ++ tlen += sizeof(struct tcpiphdr); ++ m->m_len = tlen; ++ ++ ti->ti_mbuf = NULL; ++ ti->ti_x0 = 0; ++ ti->ti_seq = htonl(seq); ++ ti->ti_ack = htonl(ack); ++ ti->ti_x2 = 0; ++ ti->ti_off = sizeof(struct tcphdr) >> 2; ++ ti->ti_flags = flags; ++ if (tp) ++ ti->ti_win = htons((uint16_t)(win >> tp->rcv_scale)); ++ else ++ ti->ti_win = htons((uint16_t)win); ++ ti->ti_urp = 0; ++ ti->ti_sum = 0; ++ ti->ti_sum = cksum(m, tlen); ++ ++ struct tcpiphdr tcpiph_save = *(mtod(m, struct tcpiphdr *)); ++ struct ip *ip; ++ struct ip6 *ip6; ++ ++ switch (af) { ++ case AF_INET: ++ m->m_data += ++ sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); ++ m->m_len -= ++ sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); ++ ip = mtod(m, struct ip *); ++ ip->ip_len = m->m_len; ++ ip->ip_dst = tcpiph_save.ti_dst; ++ ip->ip_src = tcpiph_save.ti_src; ++ ip->ip_p = tcpiph_save.ti_pr; ++ ++ if (flags & TH_RST) { ++ ip->ip_ttl = MAXTTL; ++ } else { ++ ip->ip_ttl = IPDEFTTL; ++ } ++ ++ ip_output(NULL, m); ++ break; ++ ++ case AF_INET6: ++ m->m_data += sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - ++ sizeof(struct ip6); ++ m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - ++ sizeof(struct ip6); ++ ip6 = mtod(m, struct ip6 *); ++ ip6->ip_pl = tcpiph_save.ti_len; ++ ip6->ip_dst = tcpiph_save.ti_dst6; ++ ip6->ip_src = tcpiph_save.ti_src6; ++ ip6->ip_nh = tcpiph_save.ti_nh6; ++ ++ ip6_output(NULL, m, 0); ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ } ++} ++ ++/* ++ * Create a new TCP control block, making an ++ * empty reassembly queue and hooking it to the argument ++ * protocol control block. ++ */ ++struct tcpcb *tcp_newtcpcb(struct socket *so) ++{ ++ register struct tcpcb *tp; ++ ++ tp = g_new0(struct tcpcb, 1); ++ tp->seg_next = tp->seg_prev = (struct tcpiphdr *)tp; ++ /* ++ * 40: length of IPv4 header (20) + TCP header (20) ++ * 60: length of IPv6 header (40) + TCP header (20) ++ */ ++ tp->t_maxseg = ++ MIN(so->slirp->if_mtu - ((so->so_ffamily == AF_INET) ? 40 : 60), ++ TCP_MAXSEG_MAX); ++ ++ tp->t_flags = TCP_DO_RFC1323 ? (TF_REQ_SCALE | TF_REQ_TSTMP) : 0; ++ tp->t_socket = so; ++ ++ /* ++ * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no ++ * rtt estimate. Set rttvar so that srtt + 2 * rttvar gives ++ * reasonable initial retransmit time. ++ */ ++ tp->t_srtt = TCPTV_SRTTBASE; ++ tp->t_rttvar = TCPTV_SRTTDFLT << 2; ++ tp->t_rttmin = TCPTV_MIN; ++ ++ TCPT_RANGESET(tp->t_rxtcur, ++ ((TCPTV_SRTTBASE >> 2) + (TCPTV_SRTTDFLT << 2)) >> 1, ++ TCPTV_MIN, TCPTV_REXMTMAX); ++ ++ tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT; ++ tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; ++ tp->t_state = TCPS_CLOSED; ++ ++ so->so_tcpcb = tp; ++ ++ return (tp); ++} ++ ++/* ++ * Drop a TCP connection, reporting ++ * the specified error. If connection is synchronized, ++ * then send a RST to peer. ++ */ ++struct tcpcb *tcp_drop(struct tcpcb *tp, int err) ++{ ++ DEBUG_CALL("tcp_drop"); ++ DEBUG_ARG("tp = %p", tp); ++ DEBUG_ARG("errno = %d", errno); ++ ++ if (TCPS_HAVERCVDSYN(tp->t_state)) { ++ tp->t_state = TCPS_CLOSED; ++ tcp_output(tp); ++ } ++ return (tcp_close(tp)); ++} ++ ++/* ++ * Close a TCP control block: ++ * discard all space held by the tcp ++ * discard internet protocol block ++ * wake up any sleepers ++ */ ++struct tcpcb *tcp_close(struct tcpcb *tp) ++{ ++ register struct tcpiphdr *t; ++ struct socket *so = tp->t_socket; ++ Slirp *slirp = so->slirp; ++ register struct mbuf *m; ++ ++ DEBUG_CALL("tcp_close"); ++ DEBUG_ARG("tp = %p", tp); ++ ++ /* free the reassembly queue, if any */ ++ t = tcpfrag_list_first(tp); ++ while (!tcpfrag_list_end(t, tp)) { ++ t = tcpiphdr_next(t); ++ m = tcpiphdr_prev(t)->ti_mbuf; ++ remque(tcpiphdr2qlink(tcpiphdr_prev(t))); ++ m_free(m); ++ } ++ g_free(tp); ++ so->so_tcpcb = NULL; ++ /* clobber input socket cache if we're closing the cached connection */ ++ if (so == slirp->tcp_last_so) ++ slirp->tcp_last_so = &slirp->tcb; ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ sbfree(&so->so_rcv); ++ sbfree(&so->so_snd); ++ sofree(so); ++ return ((struct tcpcb *)0); ++} ++ ++/* ++ * TCP protocol interface to socket abstraction. ++ */ ++ ++/* ++ * User issued close, and wish to trail through shutdown states: ++ * if never received SYN, just forget it. If got a SYN from peer, ++ * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. ++ * If already got a FIN from peer, then almost done; go to LAST_ACK ++ * state. In all other cases, have already sent FIN to peer (e.g. ++ * after PRU_SHUTDOWN), and just have to play tedious game waiting ++ * for peer to send FIN or not respond to keep-alives, etc. ++ * We can let the user exit from the close as soon as the FIN is acked. ++ */ ++void tcp_sockclosed(struct tcpcb *tp) ++{ ++ DEBUG_CALL("tcp_sockclosed"); ++ DEBUG_ARG("tp = %p", tp); ++ ++ if (!tp) { ++ return; ++ } ++ ++ switch (tp->t_state) { ++ case TCPS_CLOSED: ++ case TCPS_LISTEN: ++ case TCPS_SYN_SENT: ++ tp->t_state = TCPS_CLOSED; ++ tcp_close(tp); ++ return; ++ ++ case TCPS_SYN_RECEIVED: ++ case TCPS_ESTABLISHED: ++ tp->t_state = TCPS_FIN_WAIT_1; ++ break; ++ ++ case TCPS_CLOSE_WAIT: ++ tp->t_state = TCPS_LAST_ACK; ++ break; ++ } ++ tcp_output(tp); ++} ++ ++/* ++ * Connect to a host on the Internet ++ * Called by tcp_input ++ * Only do a connect, the tcp fields will be set in tcp_input ++ * return 0 if there's a result of the connect, ++ * else return -1 means we're still connecting ++ * The return value is almost always -1 since the socket is ++ * nonblocking. Connect returns after the SYN is sent, and does ++ * not wait for ACK+SYN. ++ */ ++int tcp_fconnect(struct socket *so, unsigned short af) ++{ ++ int ret = 0; ++ ++ DEBUG_CALL("tcp_fconnect"); ++ DEBUG_ARG("so = %p", so); ++ ++ ret = so->s = slirp_socket(af, SOCK_STREAM, 0); ++ if (ret >= 0) { ++ ret = slirp_bind_outbound(so, af); ++ if (ret < 0) { ++ // bind failed - close socket ++ closesocket(so->s); ++ so->s = -1; ++ return (ret); ++ } ++ } ++ ++ if (ret >= 0) { ++ int opt, s = so->s; ++ struct sockaddr_storage addr; ++ ++ slirp_set_nonblock(s); ++ so->slirp->cb->register_poll_fd(s, so->slirp->opaque); ++ slirp_socket_set_fast_reuse(s); ++ opt = 1; ++ setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(opt)); ++ opt = 1; ++ setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &opt, sizeof(opt)); ++ ++ addr = so->fhost.ss; ++ DEBUG_CALL(" connect()ing"); ++ if (sotranslate_out(so, &addr) < 0) { ++ return -1; ++ } ++ ++ /* We don't care what port we get */ ++ ret = connect(s, (struct sockaddr *)&addr, sockaddr_size(&addr)); ++ ++ /* ++ * If it's not in progress, it failed, so we just return 0, ++ * without clearing SS_NOFDREF ++ */ ++ soisfconnecting(so); ++ } ++ ++ return (ret); ++} ++ ++/* ++ * Accept the socket and connect to the local-host ++ * ++ * We have a problem. The correct thing to do would be ++ * to first connect to the local-host, and only if the ++ * connection is accepted, then do an accept() here. ++ * But, a) we need to know who's trying to connect ++ * to the socket to be able to SYN the local-host, and ++ * b) we are already connected to the foreign host by ++ * the time it gets to accept(), so... We simply accept ++ * here and SYN the local-host. ++ */ ++void tcp_connect(struct socket *inso) ++{ ++ Slirp *slirp = inso->slirp; ++ struct socket *so; ++ struct sockaddr_storage addr; ++ socklen_t addrlen = sizeof(struct sockaddr_storage); ++ struct tcpcb *tp; ++ int s, opt, ret; ++ /* AF_INET6 addresses are bigger than AF_INET, so this is big enough. */ ++ char addrstr[INET6_ADDRSTRLEN]; ++ char portstr[6]; ++ ++ DEBUG_CALL("tcp_connect"); ++ DEBUG_ARG("inso = %p", inso); ++ ret = getnameinfo((const struct sockaddr *) &inso->lhost.ss, sizeof(inso->lhost.ss), addrstr, sizeof(addrstr), portstr, sizeof(portstr), NI_NUMERICHOST|NI_NUMERICSERV); ++ g_assert(ret == 0); ++ DEBUG_ARG("ip = [%s]:%s", addrstr, portstr); ++ DEBUG_ARG("so_state = 0x%x", inso->so_state); ++ ++ /* Perform lazy guest IP address resolution if needed. */ ++ if (inso->so_state & SS_HOSTFWD) { ++ /* ++ * We can only reject the connection request by accepting it and ++ * then immediately closing it. Note that SS_FACCEPTONCE sockets can't ++ * get here. ++ */ ++ if (soassign_guest_addr_if_needed(inso) < 0) { ++ /* ++ * Guest address isn't available yet. We could either try to defer ++ * completing this connection request until the guest address is ++ * available, or punt. It's easier to punt. Otherwise we need to ++ * complicate the mechanism by which we're called to defer calling ++ * us again until the guest address is available. ++ */ ++ DEBUG_MISC(" guest address not available yet"); ++ s = accept(inso->s, (struct sockaddr *)&addr, &addrlen); ++ if (s >= 0) { ++ close(s); ++ } ++ return; ++ } ++ } ++ ++ /* ++ * If it's an SS_ACCEPTONCE socket, no need to socreate() ++ * another socket, just use the accept() socket. ++ */ ++ if (inso->so_state & SS_FACCEPTONCE) { ++ /* FACCEPTONCE already have a tcpcb */ ++ so = inso; ++ } else { ++ so = socreate(slirp); ++ tcp_attach(so); ++ so->lhost = inso->lhost; ++ so->so_ffamily = inso->so_ffamily; ++ } ++ ++ tcp_mss(sototcpcb(so), 0); ++ ++ s = accept(inso->s, (struct sockaddr *)&addr, &addrlen); ++ if (s < 0) { ++ tcp_close(sototcpcb(so)); /* This will sofree() as well */ ++ return; ++ } ++ slirp_set_nonblock(s); ++ so->slirp->cb->register_poll_fd(s, so->slirp->opaque); ++ slirp_socket_set_fast_reuse(s); ++ opt = 1; ++ setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int)); ++ slirp_socket_set_nodelay(s); ++ ++ so->fhost.ss = addr; ++ sotranslate_accept(so); ++ ++ /* Close the accept() socket, set right state */ ++ if (inso->so_state & SS_FACCEPTONCE) { ++ /* If we only accept once, close the accept() socket */ ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ ++ /* Don't select it yet, even though we have an FD */ ++ /* if it's not FACCEPTONCE, it's already NOFDREF */ ++ so->so_state = SS_NOFDREF; ++ } ++ so->s = s; ++ so->so_state |= SS_INCOMING; ++ ++ so->so_iptos = tcp_tos(so); ++ tp = sototcpcb(so); ++ ++ tcp_template(tp); ++ ++ tp->t_state = TCPS_SYN_SENT; ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; ++ tp->iss = slirp->tcp_iss; ++ slirp->tcp_iss += TCP_ISSINCR / 2; ++ tcp_sendseqinit(tp); ++ tcp_output(tp); ++} ++ ++/* ++ * Attach a TCPCB to a socket. ++ */ ++void tcp_attach(struct socket *so) ++{ ++ so->so_tcpcb = tcp_newtcpcb(so); ++ insque(so, &so->slirp->tcb); ++} ++ ++/* ++ * Set the socket's type of service field ++ */ ++static const struct tos_t tcptos[] = { ++ { 0, 20, IPTOS_THROUGHPUT, 0 }, /* ftp data */ ++ { 21, 21, IPTOS_LOWDELAY, EMU_FTP }, /* ftp control */ ++ { 0, 23, IPTOS_LOWDELAY, 0 }, /* telnet */ ++ { 0, 80, IPTOS_THROUGHPUT, 0 }, /* WWW */ ++ { 0, 513, IPTOS_LOWDELAY, EMU_RLOGIN | EMU_NOCONNECT }, /* rlogin */ ++ { 0, 544, IPTOS_LOWDELAY, EMU_KSH }, /* kshell */ ++ { 0, 543, IPTOS_LOWDELAY, 0 }, /* klogin */ ++ { 0, 6667, IPTOS_THROUGHPUT, EMU_IRC }, /* IRC */ ++ { 0, 6668, IPTOS_THROUGHPUT, EMU_IRC }, /* IRC undernet */ ++ { 0, 7070, IPTOS_LOWDELAY, EMU_REALAUDIO }, /* RealAudio control */ ++ { 0, 113, IPTOS_LOWDELAY, EMU_IDENT }, /* identd protocol */ ++ { 0, 0, 0, 0 } ++}; ++ ++/* ++ * Return TOS according to the above table ++ */ ++uint8_t tcp_tos(struct socket *so) ++{ ++ int i = 0; ++ ++ while (tcptos[i].tos) { ++ if ((tcptos[i].fport && (ntohs(so->so_fport) == tcptos[i].fport)) || ++ (tcptos[i].lport && (ntohs(so->so_lport) == tcptos[i].lport))) { ++ if (so->slirp->enable_emu) ++ so->so_emu = tcptos[i].emu; ++ return tcptos[i].tos; ++ } ++ i++; ++ } ++ return 0; ++} ++ ++/* ++ * Emulate programs that try and connect to us ++ * This includes ftp (the data connection is ++ * initiated by the server) and IRC (DCC CHAT and ++ * DCC SEND) for now ++ * ++ * NOTE: It's possible to crash SLiRP by sending it ++ * unstandard strings to emulate... if this is a problem, ++ * more checks are needed here ++ * ++ * XXX Assumes the whole command came in one packet ++ * XXX If there is more than one command in the packet, the others may ++ * be truncated. ++ * XXX If the command is too long, it may be truncated. ++ * ++ * XXX Some ftp clients will have their TOS set to ++ * LOWDELAY and so Nagel will kick in. Because of this, ++ * we'll get the first letter, followed by the rest, so ++ * we simply scan for ORT instead of PORT... ++ * DCC doesn't have this problem because there's other stuff ++ * in the packet before the DCC command. ++ * ++ * Return 1 if the mbuf m is still valid and should be ++ * sbappend()ed ++ * ++ * NOTE: if you return 0 you MUST m_free() the mbuf! ++ */ ++int tcp_emu(struct socket *so, struct mbuf *m) ++{ ++ Slirp *slirp = so->slirp; ++ unsigned n1, n2, n3, n4, n5, n6; ++ char buff[257]; ++ uint32_t laddr; ++ unsigned lport; ++ char *bptr; ++ ++ DEBUG_CALL("tcp_emu"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ ++ switch (so->so_emu) { ++ int x, i; ++ ++ /* TODO: IPv6 */ ++ case EMU_IDENT: ++ /* ++ * Identification protocol as per rfc-1413 ++ */ ++ ++ { ++ struct socket *tmpso; ++ struct sockaddr_in addr; ++ socklen_t addrlen = sizeof(struct sockaddr_in); ++ char *eol = g_strstr_len(m->m_data, m->m_len, "\r\n"); ++ ++ if (!eol) { ++ return 1; ++ } ++ ++ *eol = '\0'; ++ if (sscanf(m->m_data, "%u%*[ ,]%u", &n1, &n2) == 2) { ++ HTONS(n1); ++ HTONS(n2); ++ /* n2 is the one on our host */ ++ for (tmpso = slirp->tcb.so_next; tmpso != &slirp->tcb; ++ tmpso = tmpso->so_next) { ++ if (tmpso->so_laddr.s_addr == so->so_laddr.s_addr && ++ tmpso->so_lport == n2 && ++ tmpso->so_faddr.s_addr == so->so_faddr.s_addr && ++ tmpso->so_fport == n1) { ++ if (getsockname(tmpso->s, (struct sockaddr *)&addr, ++ &addrlen) == 0) ++ n2 = addr.sin_port; ++ break; ++ } ++ } ++ NTOHS(n1); ++ NTOHS(n2); ++ m_inc(m, g_snprintf(NULL, 0, "%d,%d\r\n", n1, n2) + 1); ++ m->m_len = slirp_fmt(m->m_data, M_ROOM(m), "%d,%d\r\n", n1, n2); ++ } else { ++ *eol = '\r'; ++ } ++ ++ return 1; ++ } ++ ++ case EMU_FTP: /* ftp */ ++ m_inc(m, m->m_len + 1); ++ *(m->m_data + m->m_len) = 0; /* NUL terminate for strstr */ ++ if ((bptr = (char *)strstr(m->m_data, "ORT")) != NULL) { ++ /* ++ * Need to emulate the PORT command ++ */ ++ x = sscanf(bptr, "ORT %u,%u,%u,%u,%u,%u\r\n%256[^\177]", &n1, &n2, ++ &n3, &n4, &n5, &n6, buff); ++ if (x < 6) ++ return 1; ++ ++ laddr = htonl((n1 << 24) | (n2 << 16) | (n3 << 8) | (n4)); ++ lport = htons((n5 << 8) | (n6)); ++ ++ if ((so = tcp_listen(slirp, INADDR_ANY, 0, laddr, lport, ++ SS_FACCEPTONCE)) == NULL) { ++ return 1; ++ } ++ n6 = ntohs(so->so_fport); ++ ++ n5 = (n6 >> 8) & 0xff; ++ n6 &= 0xff; ++ ++ laddr = ntohl(so->so_faddr.s_addr); ++ ++ n1 = ((laddr >> 24) & 0xff); ++ n2 = ((laddr >> 16) & 0xff); ++ n3 = ((laddr >> 8) & 0xff); ++ n4 = (laddr & 0xff); ++ ++ m->m_len = bptr - m->m_data; /* Adjust length */ ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "ORT %d,%d,%d,%d,%d,%d\r\n%s", ++ n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); ++ return 1; ++ } else if ((bptr = (char *)strstr(m->m_data, "27 Entering")) != NULL) { ++ /* ++ * Need to emulate the PASV response ++ */ ++ x = sscanf( ++ bptr, ++ "27 Entering Passive Mode (%u,%u,%u,%u,%u,%u)\r\n%256[^\177]", ++ &n1, &n2, &n3, &n4, &n5, &n6, buff); ++ if (x < 6) ++ return 1; ++ ++ laddr = htonl((n1 << 24) | (n2 << 16) | (n3 << 8) | (n4)); ++ lport = htons((n5 << 8) | (n6)); ++ ++ if ((so = tcp_listen(slirp, INADDR_ANY, 0, laddr, lport, ++ SS_FACCEPTONCE)) == NULL) { ++ return 1; ++ } ++ n6 = ntohs(so->so_fport); ++ ++ n5 = (n6 >> 8) & 0xff; ++ n6 &= 0xff; ++ ++ laddr = ntohl(so->so_faddr.s_addr); ++ ++ n1 = ((laddr >> 24) & 0xff); ++ n2 = ((laddr >> 16) & 0xff); ++ n3 = ((laddr >> 8) & 0xff); ++ n4 = (laddr & 0xff); ++ ++ m->m_len = bptr - m->m_data; /* Adjust length */ ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", ++ n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); ++ return 1; ++ } ++ ++ return 1; ++ ++ case EMU_KSH: ++ /* ++ * The kshell (Kerberos rsh) and shell services both pass ++ * a local port port number to carry signals to the server ++ * and stderr to the client. It is passed at the beginning ++ * of the connection as a NUL-terminated decimal ASCII string. ++ */ ++ so->so_emu = 0; ++ for (lport = 0, i = 0; i < m->m_len - 1; ++i) { ++ if (m->m_data[i] < '0' || m->m_data[i] > '9') ++ return 1; /* invalid number */ ++ lport *= 10; ++ lport += m->m_data[i] - '0'; ++ } ++ if (m->m_data[m->m_len - 1] == '\0' && lport != 0 && ++ (so = tcp_listen(slirp, INADDR_ANY, 0, so->so_laddr.s_addr, ++ htons(lport), SS_FACCEPTONCE)) != NULL) ++ m->m_len = slirp_fmt0(m->m_data, M_ROOM(m), ++ "%d", ntohs(so->so_fport)); ++ return 1; ++ ++ case EMU_IRC: ++ /* ++ * Need to emulate DCC CHAT, DCC SEND and DCC MOVE ++ */ ++ m_inc(m, m->m_len + 1); ++ *(m->m_data + m->m_len) = 0; /* NULL terminate the string for strstr */ ++ if ((bptr = (char *)strstr(m->m_data, "DCC")) == NULL) ++ return 1; ++ ++ /* The %256s is for the broken mIRC */ ++ if (sscanf(bptr, "DCC CHAT %256s %u %u", buff, &laddr, &lport) == 3) { ++ if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), ++ htons(lport), SS_FACCEPTONCE)) == NULL) { ++ return 1; ++ } ++ m->m_len = bptr - m->m_data; /* Adjust length */ ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "DCC CHAT chat %lu %u%c\n", ++ (unsigned long)ntohl(so->so_faddr.s_addr), ++ ntohs(so->so_fport), 1); ++ } else if (sscanf(bptr, "DCC SEND %256s %u %u %u", buff, &laddr, &lport, ++ &n1) == 4) { ++ if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), ++ htons(lport), SS_FACCEPTONCE)) == NULL) { ++ return 1; ++ } ++ m->m_len = bptr - m->m_data; /* Adjust length */ ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "DCC SEND %s %lu %u %u%c\n", buff, ++ (unsigned long)ntohl(so->so_faddr.s_addr), ++ ntohs(so->so_fport), n1, 1); ++ } else if (sscanf(bptr, "DCC MOVE %256s %u %u %u", buff, &laddr, &lport, ++ &n1) == 4) { ++ if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), ++ htons(lport), SS_FACCEPTONCE)) == NULL) { ++ return 1; ++ } ++ m->m_len = bptr - m->m_data; /* Adjust length */ ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "DCC MOVE %s %lu %u %u%c\n", buff, ++ (unsigned long)ntohl(so->so_faddr.s_addr), ++ ntohs(so->so_fport), n1, 1); ++ } ++ return 1; ++ ++ case EMU_REALAUDIO: ++ /* ++ * RealAudio emulation - JP. We must try to parse the incoming ++ * data and try to find the two characters that contain the ++ * port number. Then we redirect an udp port and replace the ++ * number with the real port we got. ++ * ++ * The 1.0 beta versions of the player are not supported ++ * any more. ++ * ++ * A typical packet for player version 1.0 (release version): ++ * ++ * 0000:50 4E 41 00 05 ++ * 0000:00 01 00 02 1B D7 00 00 67 E6 6C DC 63 00 12 50 ........g.l.c..P ++ * 0010:4E 43 4C 49 45 4E 54 20 31 30 31 20 41 4C 50 48 NCLIENT 101 ALPH ++ * 0020:41 6C 00 00 52 00 17 72 61 66 69 6C 65 73 2F 76 Al..R..rafiles/v ++ * 0030:6F 61 2F 65 6E 67 6C 69 73 68 5F 2E 72 61 79 42 oa/english_.rayB ++ * ++ * Now the port number 0x1BD7 is found at offset 0x04 of the ++ * Now the port number 0x1BD7 is found at offset 0x04 of the ++ * second packet. This time we received five bytes first and ++ * then the rest. You never know how many bytes you get. ++ * ++ * A typical packet for player version 2.0 (beta): ++ * ++ * 0000:50 4E 41 00 06 00 02 00 00 00 01 00 02 1B C1 00 PNA............. ++ * 0010:00 67 75 78 F5 63 00 0A 57 69 6E 32 2E 30 2E 30 .gux.c..Win2.0.0 ++ * 0020:2E 35 6C 00 00 52 00 1C 72 61 66 69 6C 65 73 2F .5l..R..rafiles/ ++ * 0030:77 65 62 73 69 74 65 2F 32 30 72 65 6C 65 61 73 website/20releas ++ * 0040:65 2E 72 61 79 53 00 00 06 36 42 e.rayS...6B ++ * ++ * Port number 0x1BC1 is found at offset 0x0d. ++ * ++ * This is just a horrible switch statement. Variable ra tells ++ * us where we're going. ++ */ ++ ++ bptr = m->m_data; ++ while (bptr < m->m_data + m->m_len) { ++ uint16_t p; ++ static int ra = 0; ++ char ra_tbl[4]; ++ ++ ra_tbl[0] = 0x50; ++ ra_tbl[1] = 0x4e; ++ ra_tbl[2] = 0x41; ++ ra_tbl[3] = 0; ++ ++ switch (ra) { ++ case 0: ++ case 2: ++ case 3: ++ if (*bptr++ != ra_tbl[ra]) { ++ ra = 0; ++ continue; ++ } ++ break; ++ ++ case 1: ++ /* ++ * We may get 0x50 several times, ignore them ++ */ ++ if (*bptr == 0x50) { ++ ra = 1; ++ bptr++; ++ continue; ++ } else if (*bptr++ != ra_tbl[ra]) { ++ ra = 0; ++ continue; ++ } ++ break; ++ ++ case 4: ++ /* ++ * skip version number ++ */ ++ bptr++; ++ break; ++ ++ case 5: ++ if (bptr == m->m_data + m->m_len - 1) ++ return 1; /* We need two bytes */ ++ ++ /* ++ * The difference between versions 1.0 and ++ * 2.0 is here. For future versions of ++ * the player this may need to be modified. ++ */ ++ if (*(bptr + 1) == 0x02) ++ bptr += 8; ++ else ++ bptr += 4; ++ break; ++ ++ case 6: ++ /* This is the field containing the port ++ * number that RA-player is listening to. ++ */ ++ ++ if (bptr == m->m_data + m->m_len - 1) ++ return 1; /* We need two bytes */ ++ ++ lport = (((uint8_t *)bptr)[0] << 8) + ((uint8_t *)bptr)[1]; ++ if (lport < 6970) ++ lport += 256; /* don't know why */ ++ if (lport < 6970 || lport > 7170) ++ return 1; /* failed */ ++ ++ /* try to get udp port between 6970 - 7170 */ ++ for (p = 6970; p < 7071; p++) { ++ if (udp_listen(slirp, INADDR_ANY, htons(p), ++ so->so_laddr.s_addr, htons(lport), ++ SS_FACCEPTONCE)) { ++ break; ++ } ++ } ++ if (p == 7071) ++ p = 0; ++ *(uint8_t *)bptr++ = (p >> 8) & 0xff; ++ *(uint8_t *)bptr = p & 0xff; ++ ra = 0; ++ return 1; /* port redirected, we're done */ ++ break; ++ ++ default: ++ ra = 0; ++ } ++ ra++; ++ } ++ return 1; ++ ++ default: ++ /* Ooops, not emulated, won't call tcp_emu again */ ++ so->so_emu = 0; ++ return 1; ++ } ++} ++ ++/* ++ * Do misc. config of SLiRP while its running. ++ * Return 0 if this connections is to be closed, 1 otherwise, ++ * return 2 if this is a command-line connection ++ */ ++int tcp_ctl(struct socket *so) ++{ ++ Slirp *slirp = so->slirp; ++ struct sbuf *sb = &so->so_snd; ++ struct gfwd_list *ex_ptr; ++ ++ DEBUG_CALL("tcp_ctl"); ++ DEBUG_ARG("so = %p", so); ++ ++ /* TODO: IPv6 */ ++ if (so->so_faddr.s_addr != slirp->vhost_addr.s_addr) { ++ /* Check if it's pty_exec */ ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ex_ptr = ex_ptr->ex_next) { ++ if (ex_ptr->ex_fport == so->so_fport && ++ so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr) { ++ if (ex_ptr->write_cb) { ++ so->s = -1; ++ so->guestfwd = ex_ptr; ++ return 1; ++ } ++ DEBUG_MISC(" executing %s", ex_ptr->ex_exec); ++ if (ex_ptr->ex_unix) ++ return open_unix(so, ex_ptr->ex_unix); ++ else ++ return fork_exec(so, ex_ptr->ex_exec); ++ } ++ } ++ } ++ sb->sb_cc = slirp_fmt(sb->sb_wptr, sb->sb_datalen - (sb->sb_wptr - sb->sb_data), ++ "Error: No application configured.\r\n"); ++ sb->sb_wptr += sb->sb_cc; ++ return 0; ++} +diff --git a/slirp/src/tcp_timer.c b/slirp/src/tcp_timer.c +new file mode 100644 +index 0000000000..bc4db2d15e +--- /dev/null ++++ b/slirp/src/tcp_timer.c +@@ -0,0 +1,286 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_timer.c 8.1 (Berkeley) 6/10/93 ++ * tcp_timer.c,v 1.2 1994/08/02 07:49:10 davidg Exp ++ */ ++ ++#include "slirp.h" ++ ++static struct tcpcb *tcp_timers(register struct tcpcb *tp, int timer); ++ ++/* ++ * Fast timeout routine for processing delayed acks ++ */ ++void tcp_fasttimo(Slirp *slirp) ++{ ++ register struct socket *so; ++ register struct tcpcb *tp; ++ ++ DEBUG_CALL("tcp_fasttimo"); ++ ++ so = slirp->tcb.so_next; ++ if (so) ++ for (; so != &slirp->tcb; so = so->so_next) ++ if ((tp = (struct tcpcb *)so->so_tcpcb) && ++ (tp->t_flags & TF_DELACK)) { ++ tp->t_flags &= ~TF_DELACK; ++ tp->t_flags |= TF_ACKNOW; ++ tcp_output(tp); ++ } ++} ++ ++/* ++ * Tcp protocol timeout routine called every 500 ms. ++ * Updates the timers in all active tcb's and ++ * causes finite state machine actions if timers expire. ++ */ ++void tcp_slowtimo(Slirp *slirp) ++{ ++ register struct socket *ip, *ipnxt; ++ register struct tcpcb *tp; ++ register int i; ++ ++ DEBUG_CALL("tcp_slowtimo"); ++ ++ /* ++ * Search through tcb's and update active timers. ++ */ ++ ip = slirp->tcb.so_next; ++ if (ip == NULL) { ++ return; ++ } ++ for (; ip != &slirp->tcb; ip = ipnxt) { ++ ipnxt = ip->so_next; ++ tp = sototcpcb(ip); ++ if (tp == NULL) { ++ continue; ++ } ++ for (i = 0; i < TCPT_NTIMERS; i++) { ++ if (tp->t_timer[i] && --tp->t_timer[i] == 0) { ++ tcp_timers(tp, i); ++ if (ipnxt->so_prev != ip) ++ goto tpgone; ++ } ++ } ++ tp->t_idle++; ++ if (tp->t_rtt) ++ tp->t_rtt++; ++ tpgone:; ++ } ++ slirp->tcp_iss += TCP_ISSINCR / PR_SLOWHZ; /* increment iss */ ++ slirp->tcp_now++; /* for timestamps */ ++} ++ ++/* ++ * Cancel all timers for TCP tp. ++ */ ++void tcp_canceltimers(struct tcpcb *tp) ++{ ++ register int i; ++ ++ for (i = 0; i < TCPT_NTIMERS; i++) ++ tp->t_timer[i] = 0; ++} ++ ++const int tcp_backoff[TCP_MAXRXTSHIFT + 1] = { 1, 2, 4, 8, 16, 32, 64, ++ 64, 64, 64, 64, 64, 64 }; ++ ++/* ++ * TCP timer processing. ++ */ ++static struct tcpcb *tcp_timers(register struct tcpcb *tp, int timer) ++{ ++ register int rexmt; ++ ++ DEBUG_CALL("tcp_timers"); ++ ++ switch (timer) { ++ /* ++ * 2 MSL timeout in shutdown went off. If we're closed but ++ * still waiting for peer to close and connection has been idle ++ * too long, or if 2MSL time is up from TIME_WAIT, delete connection ++ * control block. Otherwise, check again in a bit. ++ */ ++ case TCPT_2MSL: ++ if (tp->t_state != TCPS_TIME_WAIT && tp->t_idle <= TCP_MAXIDLE) ++ tp->t_timer[TCPT_2MSL] = TCPTV_KEEPINTVL; ++ else ++ tp = tcp_close(tp); ++ break; ++ ++ /* ++ * Retransmission timer went off. Message has not ++ * been acked within retransmit interval. Back off ++ * to a longer retransmit interval and retransmit one segment. ++ */ ++ case TCPT_REXMT: ++ ++ /* ++ * XXXXX If a packet has timed out, then remove all the queued ++ * packets for that session. ++ */ ++ ++ if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { ++ /* ++ * This is a hack to suit our terminal server here at the uni of ++ * canberra since they have trouble with zeroes... It usually lets ++ * them through unharmed, but under some conditions, it'll eat the ++ * zeros. If we keep retransmitting it, it'll keep eating the ++ * zeroes, so we keep retransmitting, and eventually the connection ++ * dies... (this only happens on incoming data) ++ * ++ * So, if we were gonna drop the connection from too many ++ * retransmits, don't... instead halve the t_maxseg, which might ++ * break up the NULLs and let them through ++ * ++ * *sigh* ++ */ ++ ++ tp->t_maxseg >>= 1; ++ if (tp->t_maxseg < 32) { ++ /* ++ * We tried our best, now the connection must die! ++ */ ++ tp->t_rxtshift = TCP_MAXRXTSHIFT; ++ tp = tcp_drop(tp, tp->t_softerror); ++ /* tp->t_softerror : ETIMEDOUT); */ /* XXX */ ++ return (tp); /* XXX */ ++ } ++ ++ /* ++ * Set rxtshift to 6, which is still at the maximum ++ * backoff time ++ */ ++ tp->t_rxtshift = 6; ++ } ++ rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; ++ TCPT_RANGESET(tp->t_rxtcur, rexmt, (short)tp->t_rttmin, ++ TCPTV_REXMTMAX); /* XXX */ ++ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; ++ /* ++ * If losing, let the lower level know and try for ++ * a better route. Also, if we backed off this far, ++ * our srtt estimate is probably bogus. Clobber it ++ * so we'll take the next rtt measurement as our srtt; ++ * move the current srtt into rttvar to keep the current ++ * retransmit times until then. ++ */ ++ if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { ++ tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); ++ tp->t_srtt = 0; ++ } ++ tp->snd_nxt = tp->snd_una; ++ /* ++ * If timing a segment in this window, stop the timer. ++ */ ++ tp->t_rtt = 0; ++ /* ++ * Close the congestion window down to one segment ++ * (we'll open it by one segment for each ack we get). ++ * Since we probably have a window's worth of unacked ++ * data accumulated, this "slow start" keeps us from ++ * dumping all that data as back-to-back packets (which ++ * might overwhelm an intermediate gateway). ++ * ++ * There are two phases to the opening: Initially we ++ * open by one mss on each ack. This makes the window ++ * size increase exponentially with time. If the ++ * window is larger than the path can handle, this ++ * exponential growth results in dropped packet(s) ++ * almost immediately. To get more time between ++ * drops but still "push" the network to take advantage ++ * of improving conditions, we switch from exponential ++ * to linear window opening at some threshold size. ++ * For a threshold, we use half the current window ++ * size, truncated to a multiple of the mss. ++ * ++ * (the minimum cwnd that will give us exponential ++ * growth is 2 mss. We don't allow the threshold ++ * to go below this.) ++ */ ++ { ++ unsigned win = MIN(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; ++ if (win < 2) ++ win = 2; ++ tp->snd_cwnd = tp->t_maxseg; ++ tp->snd_ssthresh = win * tp->t_maxseg; ++ tp->t_dupacks = 0; ++ } ++ tcp_output(tp); ++ break; ++ ++ /* ++ * Persistence timer into zero window. ++ * Force a byte to be output, if possible. ++ */ ++ case TCPT_PERSIST: ++ tcp_setpersist(tp); ++ tp->t_force = 1; ++ tcp_output(tp); ++ tp->t_force = 0; ++ break; ++ ++ /* ++ * Keep-alive timer went off; send something ++ * or drop connection if idle for too long. ++ */ ++ case TCPT_KEEP: ++ if (tp->t_state < TCPS_ESTABLISHED) ++ goto dropit; ++ ++ if (slirp_do_keepalive && tp->t_state <= TCPS_CLOSE_WAIT) { ++ if (tp->t_idle >= TCPTV_KEEP_IDLE + TCP_MAXIDLE) ++ goto dropit; ++ /* ++ * Send a packet designed to force a response ++ * if the peer is up and reachable: ++ * either an ACK if the connection is still alive, ++ * or an RST if the peer has closed the connection ++ * due to timeout or reboot. ++ * Using sequence number tp->snd_una-1 ++ * causes the transmitted zero-length segment ++ * to lie outside the receive window; ++ * by the protocol spec, this requires the ++ * correspondent TCP to respond. ++ */ ++ tcp_respond(tp, &tp->t_template, (struct mbuf *)NULL, tp->rcv_nxt, ++ tp->snd_una - 1, 0, tp->t_socket->so_ffamily); ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEPINTVL; ++ } else ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_IDLE; ++ break; ++ ++ dropit: ++ tp = tcp_drop(tp, 0); ++ break; ++ } ++ ++ return (tp); ++} +diff --git a/slirp/src/tcp_timer.h b/slirp/src/tcp_timer.h +new file mode 100644 +index 0000000000..584a5594e4 +--- /dev/null ++++ b/slirp/src/tcp_timer.h +@@ -0,0 +1,130 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_timer.h 8.1 (Berkeley) 6/10/93 ++ * tcp_timer.h,v 1.4 1994/08/21 05:27:38 paul Exp ++ */ ++ ++#ifndef TCP_TIMER_H ++#define TCP_TIMER_H ++ ++/* ++ * Definitions of the TCP timers. These timers are counted ++ * down PR_SLOWHZ times a second. ++ */ ++#define TCPT_NTIMERS 4 ++ ++#define TCPT_REXMT 0 /* retransmit */ ++#define TCPT_PERSIST 1 /* retransmit persistence */ ++#define TCPT_KEEP 2 /* keep alive */ ++#define TCPT_2MSL 3 /* 2*msl quiet time timer */ ++ ++/* ++ * The TCPT_REXMT timer is used to force retransmissions. ++ * The TCP has the TCPT_REXMT timer set whenever segments ++ * have been sent for which ACKs are expected but not yet ++ * received. If an ACK is received which advances tp->snd_una, ++ * then the retransmit timer is cleared (if there are no more ++ * outstanding segments) or reset to the base value (if there ++ * are more ACKs expected). Whenever the retransmit timer goes off, ++ * we retransmit one unacknowledged segment, and do a backoff ++ * on the retransmit timer. ++ * ++ * The TCPT_PERSIST timer is used to keep window size information ++ * flowing even if the window goes shut. If all previous transmissions ++ * have been acknowledged (so that there are no retransmissions in progress), ++ * and the window is too small to bother sending anything, then we start ++ * the TCPT_PERSIST timer. When it expires, if the window is nonzero, ++ * we go to transmit state. Otherwise, at intervals send a single byte ++ * into the peer's window to force him to update our window information. ++ * We do this at most as often as TCPT_PERSMIN time intervals, ++ * but no more frequently than the current estimate of round-trip ++ * packet time. The TCPT_PERSIST timer is cleared whenever we receive ++ * a window update from the peer. ++ * ++ * The TCPT_KEEP timer is used to keep connections alive. If an ++ * connection is idle (no segments received) for TCPTV_KEEP_INIT amount of time, ++ * but not yet established, then we drop the connection. Once the connection ++ * is established, if the connection is idle for TCPTV_KEEP_IDLE time ++ * (and keepalives have been enabled on the socket), we begin to probe ++ * the connection. We force the peer to send us a segment by sending: ++ * ++ * This segment is (deliberately) outside the window, and should elicit ++ * an ack segment in response from the peer. If, despite the TCPT_KEEP ++ * initiated segments we cannot elicit a response from a peer in TCPT_MAXIDLE ++ * amount of time probing, then we drop the connection. ++ */ ++ ++/* ++ * Time constants. ++ */ ++#define TCPTV_MSL (5 * PR_SLOWHZ) /* max seg lifetime (hah!) */ ++ ++#define TCPTV_SRTTBASE \ ++ 0 /* base roundtrip time; \ ++ if 0, no idea yet */ ++#define TCPTV_SRTTDFLT (3 * PR_SLOWHZ) /* assumed RTT if no info */ ++ ++#define TCPTV_PERSMIN (5 * PR_SLOWHZ) /* retransmit persistence */ ++#define TCPTV_PERSMAX (60 * PR_SLOWHZ) /* maximum persist interval */ ++ ++#define TCPTV_KEEP_INIT (75 * PR_SLOWHZ) /* initial connect keep alive */ ++#define TCPTV_KEEP_IDLE (120 * 60 * PR_SLOWHZ) /* dflt time before probing */ ++#define TCPTV_KEEPINTVL (75 * PR_SLOWHZ) /* default probe interval */ ++#define TCPTV_KEEPCNT 8 /* max probes before drop */ ++ ++#define TCPTV_MIN (1 * PR_SLOWHZ) /* minimum allowable value */ ++#define TCPTV_REXMTMAX (12 * PR_SLOWHZ) /* max allowable REXMT value */ ++ ++#define TCP_LINGERTIME 120 /* linger at most 2 minutes */ ++ ++#define TCP_MAXRXTSHIFT 12 /* maximum retransmits */ ++ ++ ++/* ++ * Force a time value to be in a certain range. ++ */ ++#define TCPT_RANGESET(tv, value, tvmin, tvmax) \ ++ { \ ++ (tv) = (value); \ ++ if ((tv) < (tvmin)) \ ++ (tv) = (tvmin); \ ++ else if ((tv) > (tvmax)) \ ++ (tv) = (tvmax); \ ++ } ++ ++extern const int tcp_backoff[]; ++ ++struct tcpcb; ++ ++void tcp_fasttimo(Slirp *); ++void tcp_slowtimo(Slirp *); ++void tcp_canceltimers(struct tcpcb *); ++ ++#endif +diff --git a/slirp/src/tcp_var.h b/slirp/src/tcp_var.h +new file mode 100644 +index 0000000000..c8da8cbd16 +--- /dev/null ++++ b/slirp/src/tcp_var.h +@@ -0,0 +1,161 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993, 1994 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_var.h 8.3 (Berkeley) 4/10/94 ++ * tcp_var.h,v 1.3 1994/08/21 05:27:39 paul Exp ++ */ ++ ++#ifndef TCP_VAR_H ++#define TCP_VAR_H ++ ++#include "tcpip.h" ++#include "tcp_timer.h" ++ ++/* ++ * Tcp control block, one per tcp; fields: ++ */ ++struct tcpcb { ++ struct tcpiphdr *seg_next; /* sequencing queue */ ++ struct tcpiphdr *seg_prev; ++ short t_state; /* state of this connection */ ++ short t_timer[TCPT_NTIMERS]; /* tcp timers */ ++ short t_rxtshift; /* log(2) of rexmt exp. backoff */ ++ short t_rxtcur; /* current retransmit value */ ++ short t_dupacks; /* consecutive dup acks recd */ ++ uint16_t t_maxseg; /* maximum segment size */ ++ uint8_t t_force; /* 1 if forcing out a byte */ ++ uint16_t t_flags; ++#define TF_ACKNOW 0x0001 /* ack peer immediately */ ++#define TF_DELACK 0x0002 /* ack, but try to delay it */ ++#define TF_NODELAY 0x0004 /* don't delay packets to coalesce */ ++#define TF_NOOPT 0x0008 /* don't use tcp options */ ++#define TF_SENTFIN 0x0010 /* have sent FIN */ ++#define TF_REQ_SCALE 0x0020 /* have/will request window scaling */ ++#define TF_RCVD_SCALE 0x0040 /* other side has requested scaling */ ++#define TF_REQ_TSTMP 0x0080 /* have/will request timestamps */ ++#define TF_RCVD_TSTMP 0x0100 /* a timestamp was received in SYN */ ++#define TF_SACK_PERMIT 0x0200 /* other side said I could SACK */ ++ ++ struct tcpiphdr t_template; /* static skeletal packet for xmit */ ++ ++ struct socket *t_socket; /* back pointer to socket */ ++ /* ++ * The following fields are used as in the protocol specification. ++ * See RFC783, Dec. 1981, page 21. ++ */ ++ /* send sequence variables */ ++ tcp_seq snd_una; /* send unacknowledged */ ++ tcp_seq snd_nxt; /* send next */ ++ tcp_seq snd_up; /* send urgent pointer */ ++ tcp_seq snd_wl1; /* window update seg seq number */ ++ tcp_seq snd_wl2; /* window update seg ack number */ ++ tcp_seq iss; /* initial send sequence number */ ++ uint32_t snd_wnd; /* send window */ ++ /* receive sequence variables */ ++ uint32_t rcv_wnd; /* receive window */ ++ tcp_seq rcv_nxt; /* receive next */ ++ tcp_seq rcv_up; /* receive urgent pointer */ ++ tcp_seq irs; /* initial receive sequence number */ ++ /* ++ * Additional variables for this implementation. ++ */ ++ /* receive variables */ ++ tcp_seq rcv_adv; /* advertised window */ ++ /* retransmit variables */ ++ tcp_seq snd_max; /* highest sequence number sent; ++ * used to recognize retransmits ++ */ ++ /* congestion control (for slow start, source quench, retransmit after loss) ++ */ ++ uint32_t snd_cwnd; /* congestion-controlled window */ ++ uint32_t snd_ssthresh; /* snd_cwnd size threshold for ++ * for slow start exponential to ++ * linear switch ++ */ ++ /* ++ * transmit timing stuff. See below for scale of srtt and rttvar. ++ * "Variance" is actually smoothed difference. ++ */ ++ short t_idle; /* inactivity time */ ++ short t_rtt; /* round trip time */ ++ tcp_seq t_rtseq; /* sequence number being timed */ ++ short t_srtt; /* smoothed round-trip time */ ++ short t_rttvar; /* variance in round-trip time */ ++ uint16_t t_rttmin; /* minimum rtt allowed */ ++ uint32_t max_sndwnd; /* largest window peer has offered */ ++ ++ /* out-of-band data */ ++ uint8_t t_oobflags; /* have some */ ++ uint8_t t_iobc; /* input character */ ++#define TCPOOB_HAVEDATA 0x01 ++#define TCPOOB_HADDATA 0x02 ++ short t_softerror; /* possible error not yet reported */ ++ ++ /* RFC 1323 variables */ ++ uint8_t snd_scale; /* window scaling for send window */ ++ uint8_t rcv_scale; /* window scaling for recv window */ ++ uint8_t request_r_scale; /* pending window scaling */ ++ uint8_t requested_s_scale; ++ uint32_t ts_recent; /* timestamp echo data */ ++ uint32_t ts_recent_age; /* when last updated */ ++ tcp_seq last_ack_sent; ++}; ++ ++#define sototcpcb(so) ((so)->so_tcpcb) ++ ++/* ++ * The smoothed round-trip time and estimated variance ++ * are stored as fixed point numbers scaled by the values below. ++ * For convenience, these scales are also used in smoothing the average ++ * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed). ++ * With these scales, srtt has 3 bits to the right of the binary point, ++ * and thus an "ALPHA" of 0.875. rttvar has 2 bits to the right of the ++ * binary point, and is smoothed with an ALPHA of 0.75. ++ */ ++#define TCP_RTT_SCALE 8 /* multiplier for srtt; 3 bits frac. */ ++#define TCP_RTT_SHIFT 3 /* shift for srtt; 3 bits frac. */ ++#define TCP_RTTVAR_SCALE 4 /* multiplier for rttvar; 2 bits */ ++#define TCP_RTTVAR_SHIFT 2 /* multiplier for rttvar; 2 bits */ ++ ++/* ++ * The initial retransmission should happen at rtt + 4 * rttvar. ++ * Because of the way we do the smoothing, srtt and rttvar ++ * will each average +1/2 tick of bias. When we compute ++ * the retransmit timer, we want 1/2 tick of rounding and ++ * 1 extra tick because of +-1/2 tick uncertainty in the ++ * firing of the timer. The bias will give us exactly the ++ * 1.5 tick we need. But, because the bias is ++ * statistical, we have to test that we don't drop below ++ * the minimum feasible timer (which is 2 ticks). ++ * This macro assumes that the value of TCP_RTTVAR_SCALE ++ * is the same as the multiplier for rttvar. ++ */ ++#define TCP_REXMTVAL(tp) (((tp)->t_srtt >> TCP_RTT_SHIFT) + (tp)->t_rttvar) ++ ++#endif +diff --git a/slirp/src/tcpip.h b/slirp/src/tcpip.h +new file mode 100644 +index 0000000000..a0fb2282f2 +--- /dev/null ++++ b/slirp/src/tcpip.h +@@ -0,0 +1,104 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcpip.h 8.1 (Berkeley) 6/10/93 ++ * tcpip.h,v 1.3 1994/08/21 05:27:40 paul Exp ++ */ ++ ++#ifndef TCPIP_H ++#define TCPIP_H ++ ++/* ++ * Tcp+ip header, after ip options removed. ++ */ ++struct tcpiphdr { ++ struct mbuf_ptr ih_mbuf; /* backpointer to mbuf */ ++ union { ++ struct { ++ struct in_addr ih_src; /* source internet address */ ++ struct in_addr ih_dst; /* destination internet address */ ++ uint8_t ih_x1; /* (unused) */ ++ uint8_t ih_pr; /* protocol */ ++ } ti_i4; ++ struct { ++ struct in6_addr ih_src; ++ struct in6_addr ih_dst; ++ uint8_t ih_x1; ++ uint8_t ih_nh; ++ } ti_i6; ++ } ti; ++ uint16_t ti_x0; ++ uint16_t ti_len; /* protocol length */ ++ struct tcphdr ti_t; /* tcp header */ ++}; ++#define ti_mbuf ih_mbuf.mptr ++#define ti_pr ti.ti_i4.ih_pr ++#define ti_src ti.ti_i4.ih_src ++#define ti_dst ti.ti_i4.ih_dst ++#define ti_src6 ti.ti_i6.ih_src ++#define ti_dst6 ti.ti_i6.ih_dst ++#define ti_nh6 ti.ti_i6.ih_nh ++#define ti_sport ti_t.th_sport ++#define ti_dport ti_t.th_dport ++#define ti_seq ti_t.th_seq ++#define ti_ack ti_t.th_ack ++#define ti_x2 ti_t.th_x2 ++#define ti_off ti_t.th_off ++#define ti_flags ti_t.th_flags ++#define ti_win ti_t.th_win ++#define ti_sum ti_t.th_sum ++#define ti_urp ti_t.th_urp ++ ++#define tcpiphdr2qlink(T) \ ++ ((struct qlink *)(((char *)(T)) - sizeof(struct qlink))) ++#define qlink2tcpiphdr(Q) \ ++ ((struct tcpiphdr *)(((char *)(Q)) + sizeof(struct qlink))) ++#define tcpiphdr_next(T) qlink2tcpiphdr(tcpiphdr2qlink(T)->next) ++#define tcpiphdr_prev(T) qlink2tcpiphdr(tcpiphdr2qlink(T)->prev) ++#define tcpfrag_list_first(T) qlink2tcpiphdr((T)->seg_next) ++#define tcpfrag_list_end(F, T) (tcpiphdr2qlink(F) == (struct qlink *)(T)) ++#define tcpfrag_list_empty(T) ((T)->seg_next == (struct tcpiphdr *)(T)) ++ ++/* This is the difference between the size of a tcpiphdr structure, and the ++ * size of actual ip+tcp headers, rounded up since we need to align data. */ ++#define TCPIPHDR_DELTA \ ++ (MAX(0, ((int) sizeof(struct tcpiphdr) - (int) sizeof(struct ip) - \ ++ (int) sizeof(struct tcphdr) + 3) & \ ++ ~3)) ++ ++/* ++ * Just a clean way to get to the first byte ++ * of the packet ++ */ ++struct tcpiphdr_2 { ++ struct tcpiphdr dummy; ++ char first_char; ++}; ++ ++#endif +diff --git a/slirp/src/tftp.c b/slirp/src/tftp.c +new file mode 100644 +index 0000000000..a19c889d34 +--- /dev/null ++++ b/slirp/src/tftp.c +@@ -0,0 +1,470 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * tftp.c - a simple, read-only tftp server for qemu ++ * ++ * Copyright (c) 2004 Magnus Damm ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++ ++#include "slirp.h" ++ ++#include ++#include ++#include ++ ++static inline int tftp_session_in_use(struct tftp_session *spt) ++{ ++ return (spt->slirp != NULL); ++} ++ ++static inline void tftp_session_update(struct tftp_session *spt) ++{ ++ spt->timestamp = curtime; ++} ++ ++static void tftp_session_terminate(struct tftp_session *spt) ++{ ++ if (spt->fd >= 0) { ++ close(spt->fd); ++ spt->fd = -1; ++ } ++ g_free(spt->filename); ++ spt->slirp = NULL; ++} ++ ++static int tftp_session_allocate(Slirp *slirp, struct sockaddr_storage *srcsas, ++ struct tftphdr *hdr) ++{ ++ struct tftp_session *spt; ++ int k; ++ ++ for (k = 0; k < TFTP_SESSIONS_MAX; k++) { ++ spt = &slirp->tftp_sessions[k]; ++ ++ if (!tftp_session_in_use(spt)) ++ goto found; ++ ++ /* sessions time out after 5 inactive seconds */ ++ if ((int)(curtime - spt->timestamp) > 5000) { ++ tftp_session_terminate(spt); ++ goto found; ++ } ++ } ++ ++ return -1; ++ ++found: ++ memset(spt, 0, sizeof(*spt)); ++ memcpy(&spt->client_addr, srcsas, sockaddr_size(srcsas)); ++ spt->fd = -1; ++ spt->block_size = 512; ++ spt->client_port = hdr->udp.uh_sport; ++ spt->slirp = slirp; ++ ++ tftp_session_update(spt); ++ ++ return k; ++} ++ ++static int tftp_session_find(Slirp *slirp, struct sockaddr_storage *srcsas, ++ struct tftphdr *hdr) ++{ ++ struct tftp_session *spt; ++ int k; ++ ++ for (k = 0; k < TFTP_SESSIONS_MAX; k++) { ++ spt = &slirp->tftp_sessions[k]; ++ ++ if (tftp_session_in_use(spt)) { ++ if (sockaddr_equal(&spt->client_addr, srcsas)) { ++ if (spt->client_port == hdr->udp.uh_sport) { ++ return k; ++ } ++ } ++ } ++ } ++ ++ return -1; ++} ++ ++static int tftp_read_data(struct tftp_session *spt, uint32_t block_nr, ++ uint8_t *buf, int len) ++{ ++ int bytes_read = 0; ++ ++ if (spt->fd < 0) { ++ spt->fd = open(spt->filename, O_RDONLY | O_BINARY); ++ } ++ ++ if (spt->fd < 0) { ++ return -1; ++ } ++ ++ if (len) { ++ if (lseek(spt->fd, block_nr * spt->block_size, SEEK_SET) == (off_t)-1) { ++ return -1; ++ } ++ ++ bytes_read = read(spt->fd, buf, len); ++ } ++ ++ return bytes_read; ++} ++ ++static struct tftp_t *tftp_prep_mbuf_data(struct tftp_session *spt, ++ struct mbuf *m) ++{ ++ struct tftp_t *tp; ++ ++ memset(m->m_data, 0, m->m_size); ++ ++ m->m_data += IF_MAXLINKHDR; ++ if (spt->client_addr.ss_family == AF_INET6) { ++ m->m_data += sizeof(struct ip6); ++ } else { ++ m->m_data += sizeof(struct ip); ++ } ++ tp = (void *)m->m_data; ++ m->m_data += sizeof(struct udphdr); ++ ++ return tp; ++} ++ ++static void tftp_udp_output(struct tftp_session *spt, struct mbuf *m, ++ struct tftphdr *hdr) ++{ ++ if (spt->client_addr.ss_family == AF_INET6) { ++ struct sockaddr_in6 sa6, da6; ++ ++ sa6.sin6_addr = spt->slirp->vhost_addr6; ++ sa6.sin6_port = hdr->udp.uh_dport; ++ da6.sin6_addr = ((struct sockaddr_in6 *)&spt->client_addr)->sin6_addr; ++ da6.sin6_port = spt->client_port; ++ ++ udp6_output(NULL, m, &sa6, &da6); ++ } else { ++ struct sockaddr_in sa4, da4; ++ ++ sa4.sin_addr = spt->slirp->vhost_addr; ++ sa4.sin_port = hdr->udp.uh_dport; ++ da4.sin_addr = ((struct sockaddr_in *)&spt->client_addr)->sin_addr; ++ da4.sin_port = spt->client_port; ++ ++ udp_output(NULL, m, &sa4, &da4, IPTOS_LOWDELAY); ++ } ++} ++ ++static int tftp_send_oack(struct tftp_session *spt, const char *keys[], ++ uint32_t values[], int nb, struct tftp_t *recv_tp) ++{ ++ struct mbuf *m; ++ struct tftp_t *tp; ++ int i, n = 0; ++ ++ m = m_get(spt->slirp); ++ ++ if (!m) ++ return -1; ++ ++ tp = tftp_prep_mbuf_data(spt, m); ++ ++ tp->hdr.tp_op = htons(TFTP_OACK); ++ for (i = 0; i < nb; i++) { ++ n += slirp_fmt0(tp->x.tp_buf + n, sizeof(tp->x.tp_buf) - n, "%s", keys[i]); ++ n += slirp_fmt0(tp->x.tp_buf + n, sizeof(tp->x.tp_buf) - n, "%u", values[i]); ++ } ++ ++ m->m_len = G_SIZEOF_MEMBER(struct tftp_t, hdr.tp_op) + n; ++ tftp_udp_output(spt, m, &recv_tp->hdr); ++ ++ return 0; ++} ++ ++static void tftp_send_error(struct tftp_session *spt, uint16_t errorcode, ++ const char *msg, struct tftp_t *recv_tp) ++{ ++ struct mbuf *m; ++ struct tftp_t *tp; ++ ++ DEBUG_TFTP("tftp error msg: %s", msg); ++ ++ m = m_get(spt->slirp); ++ ++ if (!m) { ++ goto out; ++ } ++ ++ tp = tftp_prep_mbuf_data(spt, m); ++ ++ tp->hdr.tp_op = htons(TFTP_ERROR); ++ tp->x.tp_error.tp_error_code = htons(errorcode); ++ slirp_pstrcpy((char *)tp->x.tp_error.tp_msg, sizeof(tp->x.tp_error.tp_msg), ++ msg); ++ ++ m->m_len = sizeof(struct tftp_t) - (TFTP_BLOCKSIZE_MAX + 2) + 3 + ++ strlen(msg) - sizeof(struct udphdr); ++ tftp_udp_output(spt, m, &recv_tp->hdr); ++ ++out: ++ tftp_session_terminate(spt); ++} ++ ++static void tftp_send_next_block(struct tftp_session *spt, ++ struct tftphdr *hdr) ++{ ++ struct mbuf *m; ++ struct tftp_t *tp; ++ int nobytes; ++ ++ m = m_get(spt->slirp); ++ ++ if (!m) { ++ return; ++ } ++ ++ tp = tftp_prep_mbuf_data(spt, m); ++ ++ tp->hdr.tp_op = htons(TFTP_DATA); ++ tp->x.tp_data.tp_block_nr = htons((spt->block_nr + 1) & 0xffff); ++ ++ nobytes = tftp_read_data(spt, spt->block_nr, tp->x.tp_data.tp_buf, ++ spt->block_size); ++ ++ if (nobytes < 0) { ++ m_free(m); ++ ++ /* send "file not found" error back */ ++ ++ tftp_send_error(spt, 1, "File not found", tp); ++ ++ return; ++ } ++ ++ m->m_len = sizeof(struct tftp_t) - (TFTP_BLOCKSIZE_MAX - nobytes) - ++ sizeof(struct udphdr); ++ tftp_udp_output(spt, m, hdr); ++ ++ if (nobytes == spt->block_size) { ++ tftp_session_update(spt); ++ } else { ++ tftp_session_terminate(spt); ++ } ++ ++ spt->block_nr++; ++} ++ ++static void tftp_handle_rrq(Slirp *slirp, struct sockaddr_storage *srcsas, ++ struct tftp_t *tp, int pktlen) ++{ ++ struct tftp_session *spt; ++ int s, k; ++ size_t prefix_len; ++ char *req_fname; ++ const char *option_name[2]; ++ uint32_t option_value[2]; ++ int nb_options = 0; ++ ++ /* check if a session already exists and if so terminate it */ ++ s = tftp_session_find(slirp, srcsas, &tp->hdr); ++ if (s >= 0) { ++ tftp_session_terminate(&slirp->tftp_sessions[s]); ++ } ++ ++ s = tftp_session_allocate(slirp, srcsas, &tp->hdr); ++ ++ if (s < 0) { ++ return; ++ } ++ ++ spt = &slirp->tftp_sessions[s]; ++ ++ /* unspecified prefix means service disabled */ ++ if (!slirp->tftp_prefix) { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ ++ /* skip header fields */ ++ k = 0; ++ pktlen -= offsetof(struct tftp_t, x.tp_buf); ++ ++ /* prepend tftp_prefix */ ++ prefix_len = strlen(slirp->tftp_prefix); ++ spt->filename = g_malloc(prefix_len + TFTP_FILENAME_MAX + 2); ++ memcpy(spt->filename, slirp->tftp_prefix, prefix_len); ++ spt->filename[prefix_len] = '/'; ++ ++ /* get name */ ++ req_fname = spt->filename + prefix_len + 1; ++ ++ while (1) { ++ if (k >= TFTP_FILENAME_MAX || k >= pktlen) { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ req_fname[k] = tp->x.tp_buf[k]; ++ if (req_fname[k++] == '\0') { ++ break; ++ } ++ } ++ ++ DEBUG_TFTP("tftp rrq file: %s", req_fname); ++ ++ /* check mode */ ++ if ((pktlen - k) < 6) { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ ++ if (strcasecmp(&tp->x.tp_buf[k], "octet") != 0) { ++ tftp_send_error(spt, 4, "Unsupported transfer mode", tp); ++ return; ++ } ++ ++ k += 6; /* skipping octet */ ++ ++ /* do sanity checks on the filename */ ++ if ( ++#ifdef G_OS_WIN32 ++ strstr(req_fname, "..\\") || ++ req_fname[strlen(req_fname) - 1] == '\\' || ++#endif ++ strstr(req_fname, "../") || ++ req_fname[strlen(req_fname) - 1] == '/') { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ ++ /* check if the file exists */ ++ if (tftp_read_data(spt, 0, NULL, 0) < 0) { ++ tftp_send_error(spt, 1, "File not found", tp); ++ return; ++ } ++ ++ if (tp->x.tp_buf[pktlen - 1] != 0) { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ ++ while (k < pktlen && nb_options < G_N_ELEMENTS(option_name)) { ++ const char *key, *value; ++ ++ key = &tp->x.tp_buf[k]; ++ k += strlen(key) + 1; ++ ++ if (k >= pktlen) { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ ++ value = &tp->x.tp_buf[k]; ++ k += strlen(value) + 1; ++ ++ if (strcasecmp(key, "tsize") == 0) { ++ int tsize = atoi(value); ++ struct stat stat_p; ++ ++ if (tsize == 0) { ++ if (stat(spt->filename, &stat_p) == 0) ++ tsize = stat_p.st_size; ++ else { ++ tftp_send_error(spt, 1, "File not found", tp); ++ return; ++ } ++ } ++ ++ option_name[nb_options] = "tsize"; ++ option_value[nb_options] = tsize; ++ nb_options++; ++ } else if (strcasecmp(key, "blksize") == 0) { ++ int blksize = atoi(value); ++ ++ /* Accept blksize up to our maximum size */ ++ if (blksize > 0) { ++ spt->block_size = MIN(blksize, TFTP_BLOCKSIZE_MAX); ++ option_name[nb_options] = "blksize"; ++ option_value[nb_options] = spt->block_size; ++ nb_options++; ++ } ++ } ++ } ++ ++ if (nb_options > 0) { ++ assert(nb_options <= G_N_ELEMENTS(option_name)); ++ tftp_send_oack(spt, option_name, option_value, nb_options, tp); ++ return; ++ } ++ ++ spt->block_nr = 0; ++ tftp_send_next_block(spt, &tp->hdr); ++} ++ ++static void tftp_handle_ack(Slirp *slirp, struct sockaddr_storage *srcsas, ++ struct tftphdr *hdr) ++{ ++ int s; ++ ++ s = tftp_session_find(slirp, srcsas, hdr); ++ ++ if (s < 0) { ++ return; ++ } ++ ++ tftp_send_next_block(&slirp->tftp_sessions[s], hdr); ++} ++ ++static void tftp_handle_error(Slirp *slirp, struct sockaddr_storage *srcsas, ++ struct tftphdr *hdr) ++{ ++ int s; ++ ++ s = tftp_session_find(slirp, srcsas, hdr); ++ ++ if (s < 0) { ++ return; ++ } ++ ++ tftp_session_terminate(&slirp->tftp_sessions[s]); ++} ++ ++void tftp_input(struct sockaddr_storage *srcsas, struct mbuf *m) ++{ ++ struct tftphdr *hdr = mtod_check(m, sizeof(struct tftphdr)); ++ ++ if (hdr == NULL) { ++ return; ++ } ++ ++ switch (ntohs(hdr->tp_op)) { ++ case TFTP_RRQ: ++ tftp_handle_rrq(m->slirp, srcsas, ++ mtod(m, struct tftp_t *), ++ m->m_len); ++ break; ++ ++ case TFTP_ACK: ++ tftp_handle_ack(m->slirp, srcsas, hdr); ++ break; ++ ++ case TFTP_ERROR: ++ tftp_handle_error(m->slirp, srcsas, hdr); ++ break; ++ } ++} +diff --git a/slirp/src/tftp.h b/slirp/src/tftp.h +new file mode 100644 +index 0000000000..cafab03f2f +--- /dev/null ++++ b/slirp/src/tftp.h +@@ -0,0 +1,58 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* tftp defines */ ++ ++#ifndef SLIRP_TFTP_H ++#define SLIRP_TFTP_H ++ ++#include "util.h" ++ ++#define TFTP_SESSIONS_MAX 20 ++ ++#define TFTP_SERVER 69 ++ ++#define TFTP_RRQ 1 ++#define TFTP_WRQ 2 ++#define TFTP_DATA 3 ++#define TFTP_ACK 4 ++#define TFTP_ERROR 5 ++#define TFTP_OACK 6 ++ ++#define TFTP_FILENAME_MAX 512 ++#define TFTP_BLOCKSIZE_MAX 1428 ++ ++struct tftphdr { ++ struct udphdr udp; ++ uint16_t tp_op; ++} SLIRP_PACKED; ++ ++struct tftp_t { ++ struct tftphdr hdr; ++ union { ++ struct { ++ uint16_t tp_block_nr; ++ uint8_t tp_buf[TFTP_BLOCKSIZE_MAX]; ++ } tp_data; ++ struct { ++ uint16_t tp_error_code; ++ uint8_t tp_msg[TFTP_BLOCKSIZE_MAX]; ++ } tp_error; ++ char tp_buf[TFTP_BLOCKSIZE_MAX + 2]; ++ } x; ++} SLIRP_PACKED; ++ ++struct tftp_session { ++ Slirp *slirp; ++ char *filename; ++ int fd; ++ uint16_t block_size; ++ ++ struct sockaddr_storage client_addr; ++ uint16_t client_port; ++ uint32_t block_nr; ++ ++ int timestamp; ++}; ++ ++void tftp_input(struct sockaddr_storage *srcsas, struct mbuf *m); ++ ++#endif +diff --git a/slirp/src/udp.c b/slirp/src/udp.c +new file mode 100644 +index 0000000000..06b7b7d032 +--- /dev/null ++++ b/slirp/src/udp.c +@@ -0,0 +1,425 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)udp_usrreq.c 8.4 (Berkeley) 1/21/94 ++ * udp_usrreq.c,v 1.4 1994/10/02 17:48:45 phk Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP ++ * Copyright (c) 1995 Danny Gasparovski. ++ * ++ * Please read the file COPYRIGHT for the ++ * terms and conditions of the copyright. ++ */ ++ ++#include "slirp.h" ++#include "ip_icmp.h" ++ ++static uint8_t udp_tos(struct socket *so); ++ ++void udp_init(Slirp *slirp) ++{ ++ slirp->udb.so_next = slirp->udb.so_prev = &slirp->udb; ++ slirp->udp_last_so = &slirp->udb; ++} ++ ++void udp_cleanup(Slirp *slirp) ++{ ++ struct socket *so, *so_next; ++ ++ for (so = slirp->udb.so_next; so != &slirp->udb; so = so_next) { ++ so_next = so->so_next; ++ udp_detach(slirp->udb.so_next); ++ } ++} ++ ++/* m->m_data points at ip packet header ++ * m->m_len length ip packet ++ * ip->ip_len length data (IPDU) ++ */ ++void udp_input(register struct mbuf *m, int iphlen) ++{ ++ Slirp *slirp = m->slirp; ++ M_DUP_DEBUG(slirp, m, 0, 0); ++ ++ register struct ip *ip; ++ register struct udphdr *uh; ++ int len; ++ struct ip save_ip; ++ struct socket *so; ++ struct sockaddr_storage lhost; ++ struct sockaddr_in *lhost4; ++ int ttl; ++ ++ DEBUG_CALL("udp_input"); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("iphlen = %d", iphlen); ++ ++ /* ++ * Strip IP options, if any; should skip this, ++ * make available to user, and use on returned packets, ++ * but we don't yet have a way to check the checksum ++ * with options still present. ++ */ ++ if (iphlen > sizeof(struct ip)) { ++ ip_stripoptions(m, (struct mbuf *)0); ++ iphlen = sizeof(struct ip); ++ } ++ ++ /* ++ * Get IP and UDP header together in first mbuf. ++ */ ++ ip = mtod_check(m, iphlen + sizeof(struct udphdr)); ++ if (ip == NULL) { ++ goto bad; ++ } ++ uh = (struct udphdr *)((char *)ip + iphlen); ++ ++ /* ++ * Make mbuf data length reflect UDP length. ++ * If not enough data to reflect UDP length, drop. ++ */ ++ len = ntohs((uint16_t)uh->uh_ulen); ++ ++ if (ip->ip_len != len) { ++ if (len > ip->ip_len) { ++ goto bad; ++ } ++ m_adj(m, len - ip->ip_len); ++ ip->ip_len = len; ++ } ++ ++ /* ++ * Save a copy of the IP header in case we want restore it ++ * for sending an ICMP error message in response. ++ */ ++ save_ip = *ip; ++ save_ip.ip_len += iphlen; /* tcp_input subtracts this */ ++ ++ /* ++ * Checksum extended UDP header and data. ++ */ ++ if (uh->uh_sum) { ++ memset(&((struct ipovly *)ip)->ih_mbuf, 0, sizeof(struct mbuf_ptr)); ++ ((struct ipovly *)ip)->ih_x1 = 0; ++ ((struct ipovly *)ip)->ih_len = uh->uh_ulen; ++ if (cksum(m, len + sizeof(struct ip))) { ++ goto bad; ++ } ++ } ++ ++ lhost.ss_family = AF_INET; ++ lhost4 = (struct sockaddr_in *)&lhost; ++ lhost4->sin_addr = ip->ip_src; ++ lhost4->sin_port = uh->uh_sport; ++ ++ /* ++ * handle DHCP/BOOTP ++ */ ++ if (ntohs(uh->uh_dport) == BOOTP_SERVER && ++ (ip->ip_dst.s_addr == slirp->vhost_addr.s_addr || ++ ip->ip_dst.s_addr == 0xffffffff)) { ++ bootp_input(m); ++ goto bad; ++ } ++ ++ /* ++ * handle TFTP ++ */ ++ if (ntohs(uh->uh_dport) == TFTP_SERVER && ++ ip->ip_dst.s_addr == slirp->vhost_addr.s_addr) { ++ m->m_data += iphlen; ++ m->m_len -= iphlen; ++ tftp_input(&lhost, m); ++ m->m_data -= iphlen; ++ m->m_len += iphlen; ++ goto bad; ++ } ++ ++ if (slirp->restricted) { ++ goto bad; ++ } ++ ++ /* ++ * Locate pcb for datagram. ++ */ ++ so = solookup(&slirp->udp_last_so, &slirp->udb, &lhost, NULL); ++ ++ if (so == NULL) { ++ /* ++ * If there's no socket for this packet, ++ * create one ++ */ ++ so = socreate(slirp); ++ if (udp_attach(so, AF_INET) == -1) { ++ DEBUG_MISC(" udp_attach errno = %d-%s", errno, strerror(errno)); ++ sofree(so); ++ goto bad; ++ } ++ ++ /* ++ * Setup fields ++ */ ++ so->so_lfamily = AF_INET; ++ so->so_laddr = ip->ip_src; ++ so->so_lport = uh->uh_sport; ++ ++ if ((so->so_iptos = udp_tos(so)) == 0) ++ so->so_iptos = ip->ip_tos; ++ ++ /* ++ * XXXXX Here, check if it's in udpexec_list, ++ * and if it is, do the fork_exec() etc. ++ */ ++ } ++ ++ so->so_ffamily = AF_INET; ++ so->so_faddr = ip->ip_dst; /* XXX */ ++ so->so_fport = uh->uh_dport; /* XXX */ ++ ++ iphlen += sizeof(struct udphdr); ++ m->m_len -= iphlen; ++ m->m_data += iphlen; ++ ++ /* ++ * Check for TTL ++ */ ++ ttl = save_ip.ip_ttl-1; ++ if (ttl <= 0) { ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ *ip = save_ip; ++ DEBUG_MISC("udp ttl exceeded"); ++ icmp_send_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, NULL); ++ goto bad; ++ } ++ setsockopt(so->s, IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)); ++ ++ /* ++ * Now we sendto() the packet. ++ */ ++ if (sosendto(so, m) == -1) { ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ *ip = save_ip; ++ DEBUG_MISC("udp tx errno = %d-%s", errno, strerror(errno)); ++ icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, strerror(errno)); ++ goto bad; ++ } ++ ++ m_free(so->so_m); /* used for ICMP if error on sorecvfrom */ ++ ++ /* restore the orig mbuf packet */ ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ *ip = save_ip; ++ so->so_m = m; /* ICMP backup */ ++ ++ return; ++bad: ++ m_free(m); ++} ++ ++int udp_output(struct socket *so, struct mbuf *m, struct sockaddr_in *saddr, ++ struct sockaddr_in *daddr, int iptos) ++{ ++ Slirp *slirp = m->slirp; ++ M_DUP_DEBUG(slirp, m, 0, sizeof(struct udpiphdr)); ++ ++ register struct udpiphdr *ui; ++ int error = 0; ++ ++ DEBUG_CALL("udp_output"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("saddr = %s", inet_ntoa(saddr->sin_addr)); ++ DEBUG_ARG("daddr = %s", inet_ntoa(daddr->sin_addr)); ++ ++ /* ++ * Adjust for header ++ */ ++ m->m_data -= sizeof(struct udpiphdr); ++ m->m_len += sizeof(struct udpiphdr); ++ ++ /* ++ * Fill in mbuf with extended UDP header ++ * and addresses and length put into network format. ++ */ ++ ui = mtod(m, struct udpiphdr *); ++ memset(&ui->ui_i.ih_mbuf, 0, sizeof(struct mbuf_ptr)); ++ ui->ui_x1 = 0; ++ ui->ui_pr = IPPROTO_UDP; ++ ui->ui_len = htons(m->m_len - sizeof(struct ip)); ++ /* XXXXX Check for from-one-location sockets, or from-any-location sockets ++ */ ++ ui->ui_src = saddr->sin_addr; ++ ui->ui_dst = daddr->sin_addr; ++ ui->ui_sport = saddr->sin_port; ++ ui->ui_dport = daddr->sin_port; ++ ui->ui_ulen = ui->ui_len; ++ ++ /* ++ * Stuff checksum and output datagram. ++ */ ++ ui->ui_sum = 0; ++ if ((ui->ui_sum = cksum(m, m->m_len)) == 0) ++ ui->ui_sum = 0xffff; ++ ((struct ip *)ui)->ip_len = m->m_len; ++ ++ ((struct ip *)ui)->ip_ttl = IPDEFTTL; ++ ((struct ip *)ui)->ip_tos = iptos; ++ ++ error = ip_output(so, m); ++ ++ return (error); ++} ++ ++int udp_attach(struct socket *so, unsigned short af) ++{ ++ so->s = slirp_socket(af, SOCK_DGRAM, 0); ++ if (so->s != -1) { ++ if (slirp_bind_outbound(so, af) != 0) { ++ // bind failed - close socket ++ closesocket(so->s); ++ so->s = -1; ++ return -1; ++ } ++ ++#ifdef __linux__ ++ { ++ int opt = 1; ++ switch (af) { ++ case AF_INET: ++ setsockopt(so->s, IPPROTO_IP, IP_RECVERR, &opt, sizeof(opt)); ++ break; ++ case AF_INET6: ++ setsockopt(so->s, IPPROTO_IPV6, IPV6_RECVERR, &opt, sizeof(opt)); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ } ++#endif ++ ++ so->so_expire = curtime + SO_EXPIRE; ++ insque(so, &so->slirp->udb); ++ } ++ so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); ++ return (so->s); ++} ++ ++void udp_detach(struct socket *so) ++{ ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ sofree(so); ++} ++ ++static const struct tos_t udptos[] = { { 0, 53, IPTOS_LOWDELAY, 0 }, /* DNS */ ++ { 0, 0, 0, 0 } }; ++ ++static uint8_t udp_tos(struct socket *so) ++{ ++ int i = 0; ++ ++ while (udptos[i].tos) { ++ if ((udptos[i].fport && ntohs(so->so_fport) == udptos[i].fport) || ++ (udptos[i].lport && ntohs(so->so_lport) == udptos[i].lport)) { ++ if (so->slirp->enable_emu) ++ so->so_emu = udptos[i].emu; ++ return udptos[i].tos; ++ } ++ i++; ++ } ++ ++ return 0; ++} ++ ++struct socket *udpx_listen(Slirp *slirp, ++ const struct sockaddr *haddr, socklen_t haddrlen, ++ const struct sockaddr *laddr, socklen_t laddrlen, ++ int flags) ++{ ++ struct socket *so; ++ socklen_t addrlen; ++ int save_errno; ++ ++ so = socreate(slirp); ++ so->s = slirp_socket(haddr->sa_family, SOCK_DGRAM, 0); ++ if (so->s < 0) { ++ save_errno = errno; ++ sofree(so); ++ errno = save_errno; ++ return NULL; ++ } ++ if (haddr->sa_family == AF_INET6) ++ slirp_socket_set_v6only(so->s, (flags & SS_HOSTFWD_V6ONLY) != 0); ++ so->so_expire = curtime + SO_EXPIRE; ++ insque(so, &slirp->udb); ++ ++ if (bind(so->s, haddr, haddrlen) < 0) { ++ save_errno = errno; ++ udp_detach(so); ++ errno = save_errno; ++ return NULL; ++ } ++ slirp_socket_set_fast_reuse(so->s); ++ ++ addrlen = sizeof(so->fhost); ++ getsockname(so->s, &so->fhost.sa, &addrlen); ++ sotranslate_accept(so); ++ ++ sockaddr_copy(&so->lhost.sa, sizeof(so->lhost), laddr, laddrlen); ++ ++ if (flags != SS_FACCEPTONCE) ++ so->so_expire = 0; ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_ISFCONNECTED | flags; ++ ++ return so; ++} ++ ++struct socket *udp_listen(Slirp *slirp, uint32_t haddr, unsigned hport, ++ uint32_t laddr, unsigned lport, int flags) ++{ ++ struct sockaddr_in hsa, lsa; ++ ++ memset(&hsa, 0, sizeof(hsa)); ++ hsa.sin_family = AF_INET; ++ hsa.sin_addr.s_addr = haddr; ++ hsa.sin_port = hport; ++ ++ memset(&lsa, 0, sizeof(lsa)); ++ lsa.sin_family = AF_INET; ++ lsa.sin_addr.s_addr = laddr; ++ lsa.sin_port = lport; ++ ++ return udpx_listen(slirp, (const struct sockaddr *) &hsa, sizeof(hsa), (struct sockaddr *) &lsa, sizeof(lsa), flags); ++} +diff --git a/slirp/src/udp.h b/slirp/src/udp.h +new file mode 100644 +index 0000000000..47f4ed34d8 +--- /dev/null ++++ b/slirp/src/udp.h +@@ -0,0 +1,96 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)udp.h 8.1 (Berkeley) 6/10/93 ++ * udp.h,v 1.3 1994/08/21 05:27:41 paul Exp ++ */ ++ ++#ifndef UDP_H ++#define UDP_H ++ ++#include "socket.h" ++ ++#define UDP_TTL 0x60 ++#define UDP_UDPDATALEN 16192 ++ ++/* ++ * Udp protocol header. ++ * Per RFC 768, September, 1981. ++ */ ++struct udphdr { ++ uint16_t uh_sport; /* source port */ ++ uint16_t uh_dport; /* destination port */ ++ int16_t uh_ulen; /* udp length */ ++ uint16_t uh_sum; /* udp checksum */ ++}; ++ ++/* ++ * UDP kernel structures and variables. ++ */ ++struct udpiphdr { ++ struct ipovly ui_i; /* overlaid ip structure */ ++ struct udphdr ui_u; /* udp header */ ++}; ++#define ui_mbuf ui_i.ih_mbuf.mptr ++#define ui_x1 ui_i.ih_x1 ++#define ui_pr ui_i.ih_pr ++#define ui_len ui_i.ih_len ++#define ui_src ui_i.ih_src ++#define ui_dst ui_i.ih_dst ++#define ui_sport ui_u.uh_sport ++#define ui_dport ui_u.uh_dport ++#define ui_ulen ui_u.uh_ulen ++#define ui_sum ui_u.uh_sum ++ ++/* ++ * Names for UDP sysctl objects ++ */ ++#define UDPCTL_CHECKSUM 1 /* checksum UDP packets */ ++#define UDPCTL_MAXID 2 ++ ++struct mbuf; ++ ++void udp_init(Slirp *); ++void udp_cleanup(Slirp *); ++void udp_input(register struct mbuf *, int); ++int udp_attach(struct socket *, unsigned short af); ++void udp_detach(struct socket *); ++struct socket *udp_listen(Slirp *, uint32_t, unsigned, uint32_t, unsigned, int); ++struct socket *udpx_listen(Slirp *, ++ const struct sockaddr *haddr, socklen_t haddrlen, ++ const struct sockaddr *laddr, socklen_t laddrlen, ++ int flags); ++int udp_output(struct socket *so, struct mbuf *m, struct sockaddr_in *saddr, ++ struct sockaddr_in *daddr, int iptos); ++ ++void udp6_input(register struct mbuf *); ++int udp6_output(struct socket *so, struct mbuf *m, struct sockaddr_in6 *saddr, ++ struct sockaddr_in6 *daddr); ++ ++#endif +diff --git a/slirp/src/udp6.c b/slirp/src/udp6.c +new file mode 100644 +index 0000000000..efeac5c19a +--- /dev/null ++++ b/slirp/src/udp6.c +@@ -0,0 +1,196 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron ++ */ ++ ++#include "slirp.h" ++#include "udp.h" ++#include "dhcpv6.h" ++ ++void udp6_input(struct mbuf *m) ++{ ++ Slirp *slirp = m->slirp; ++ M_DUP_DEBUG(slirp, m, 0, 0); ++ ++ struct ip6 *ip, save_ip; ++ struct udphdr *uh; ++ int iphlen = sizeof(struct ip6); ++ int len; ++ struct socket *so; ++ struct sockaddr_in6 lhost; ++ int hop_limit; ++ ++ DEBUG_CALL("udp6_input"); ++ DEBUG_ARG("m = %p", m); ++ ++ if (slirp->restricted) { ++ goto bad; ++ } ++ ++ ip = mtod(m, struct ip6 *); ++ m->m_len -= iphlen; ++ m->m_data += iphlen; ++ uh = mtod_check(m, sizeof(struct udphdr)); ++ if (uh == NULL) { ++ goto bad; ++ } ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ ++ if (ip6_cksum(m)) { ++ goto bad; ++ } ++ ++ len = ntohs((uint16_t)uh->uh_ulen); ++ ++ /* ++ * Make mbuf data length reflect UDP length. ++ * If not enough data to reflect UDP length, drop. ++ */ ++ if (ntohs(ip->ip_pl) != len) { ++ if (len > ntohs(ip->ip_pl)) { ++ goto bad; ++ } ++ m_adj(m, len - ntohs(ip->ip_pl)); ++ ip->ip_pl = htons(len); ++ } ++ ++ /* ++ * Save a copy of the IP header in case we want restore it ++ * for sending an ICMP error message in response. ++ */ ++ save_ip = *ip; ++ ++ /* Locate pcb for datagram. */ ++ lhost.sin6_family = AF_INET6; ++ lhost.sin6_addr = ip->ip_src; ++ lhost.sin6_port = uh->uh_sport; ++ ++ /* handle DHCPv6 */ ++ if (ntohs(uh->uh_dport) == DHCPV6_SERVER_PORT && ++ (in6_equal(&ip->ip_dst, &slirp->vhost_addr6) || ++ in6_dhcp_multicast(&ip->ip_dst))) { ++ m->m_data += iphlen; ++ m->m_len -= iphlen; ++ dhcpv6_input(&lhost, m); ++ m->m_data -= iphlen; ++ m->m_len += iphlen; ++ goto bad; ++ } ++ ++ /* handle TFTP */ ++ if (ntohs(uh->uh_dport) == TFTP_SERVER && ++ !memcmp(ip->ip_dst.s6_addr, slirp->vhost_addr6.s6_addr, 16)) { ++ m->m_data += iphlen; ++ m->m_len -= iphlen; ++ tftp_input((struct sockaddr_storage *)&lhost, m); ++ m->m_data -= iphlen; ++ m->m_len += iphlen; ++ goto bad; ++ } ++ ++ so = solookup(&slirp->udp_last_so, &slirp->udb, ++ (struct sockaddr_storage *)&lhost, NULL); ++ ++ if (so == NULL) { ++ /* If there's no socket for this packet, create one. */ ++ so = socreate(slirp); ++ if (udp_attach(so, AF_INET6) == -1) { ++ DEBUG_MISC(" udp6_attach errno = %d-%s", errno, strerror(errno)); ++ sofree(so); ++ goto bad; ++ } ++ ++ /* Setup fields */ ++ so->so_lfamily = AF_INET6; ++ so->so_laddr6 = ip->ip_src; ++ so->so_lport6 = uh->uh_sport; ++ } ++ ++ so->so_ffamily = AF_INET6; ++ so->so_faddr6 = ip->ip_dst; /* XXX */ ++ so->so_fport6 = uh->uh_dport; /* XXX */ ++ ++ iphlen += sizeof(struct udphdr); ++ m->m_len -= iphlen; ++ m->m_data += iphlen; ++ ++ /* ++ * Check for TTL ++ */ ++ hop_limit = save_ip.ip_hl-1; ++ if (hop_limit <= 0) { ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ *ip = save_ip; ++ DEBUG_MISC("udp ttl exceeded"); ++ icmp6_send_error(m, ICMP6_TIMXCEED, ICMP6_TIMXCEED_INTRANS); ++ goto bad; ++ } ++ setsockopt(so->s, IPPROTO_IPV6, IPV6_UNICAST_HOPS, &hop_limit, sizeof(hop_limit)); ++ ++ /* ++ * Now we sendto() the packet. ++ */ ++ if (sosendto(so, m) == -1) { ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ *ip = save_ip; ++ DEBUG_MISC("udp tx errno = %d-%s", errno, strerror(errno)); ++ icmp6_send_error(m, ICMP6_UNREACH, ICMP6_UNREACH_NO_ROUTE); ++ goto bad; ++ } ++ ++ m_free(so->so_m); /* used for ICMP if error on sorecvfrom */ ++ ++ /* restore the orig mbuf packet */ ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ *ip = save_ip; ++ so->so_m = m; ++ ++ return; ++bad: ++ m_free(m); ++} ++ ++int udp6_output(struct socket *so, struct mbuf *m, struct sockaddr_in6 *saddr, ++ struct sockaddr_in6 *daddr) ++{ ++ Slirp *slirp = m->slirp; ++ M_DUP_DEBUG(slirp, m, 0, sizeof(struct ip6) + sizeof(struct udphdr)); ++ ++ struct ip6 *ip; ++ struct udphdr *uh; ++ ++ DEBUG_CALL("udp6_output"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ ++ /* adjust for header */ ++ m->m_data -= sizeof(struct udphdr); ++ m->m_len += sizeof(struct udphdr); ++ uh = mtod(m, struct udphdr *); ++ m->m_data -= sizeof(struct ip6); ++ m->m_len += sizeof(struct ip6); ++ ip = mtod(m, struct ip6 *); ++ ++ /* Build IP header */ ++ ip->ip_pl = htons(m->m_len - sizeof(struct ip6)); ++ ip->ip_nh = IPPROTO_UDP; ++ ip->ip_src = saddr->sin6_addr; ++ ip->ip_dst = daddr->sin6_addr; ++ ++ /* Build UDP header */ ++ uh->uh_sport = saddr->sin6_port; ++ uh->uh_dport = daddr->sin6_port; ++ uh->uh_ulen = ip->ip_pl; ++ uh->uh_sum = 0; ++ uh->uh_sum = ip6_cksum(m); ++ if (uh->uh_sum == 0) { ++ uh->uh_sum = 0xffff; ++ } ++ ++ return ip6_output(so, m, 0); ++} +diff --git a/slirp/src/util.c b/slirp/src/util.c +new file mode 100644 +index 0000000000..e6bccbe0fa +--- /dev/null ++++ b/slirp/src/util.c +@@ -0,0 +1,441 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * util.c (mostly based on QEMU os-win32.c) ++ * ++ * Copyright (c) 2003-2008 Fabrice Bellard ++ * Copyright (c) 2010-2016 Red Hat, Inc. ++ * ++ * QEMU library functions for win32 which are shared between QEMU and ++ * the QEMU tools. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "util.h" ++ ++#include ++#include ++#include ++ ++#if defined(_WIN32) ++int slirp_inet_aton(const char *cp, struct in_addr *ia) ++{ ++ uint32_t addr = inet_addr(cp); ++ if (addr == 0xffffffff) { ++ return 0; ++ } ++ ia->s_addr = addr; ++ return 1; ++} ++#endif ++ ++void slirp_set_nonblock(int fd) ++{ ++#ifndef _WIN32 ++ int f; ++ f = fcntl(fd, F_GETFL); ++ assert(f != -1); ++ f = fcntl(fd, F_SETFL, f | O_NONBLOCK); ++ assert(f != -1); ++#else ++ unsigned long opt = 1; ++ ioctlsocket(fd, FIONBIO, &opt); ++#endif ++} ++ ++static void slirp_set_cloexec(int fd) ++{ ++#ifndef _WIN32 ++ int f; ++ f = fcntl(fd, F_GETFD); ++ assert(f != -1); ++ f = fcntl(fd, F_SETFD, f | FD_CLOEXEC); ++ assert(f != -1); ++#endif ++} ++ ++/* ++ * Opens a socket with FD_CLOEXEC set ++ * On failure errno contains the reason. ++ */ ++int slirp_socket(int domain, int type, int protocol) ++{ ++ int ret; ++ ++#ifdef SOCK_CLOEXEC ++ ret = socket(domain, type | SOCK_CLOEXEC, protocol); ++ if (ret != -1 || errno != EINVAL) { ++ return ret; ++ } ++#endif ++ ret = socket(domain, type, protocol); ++ if (ret >= 0) { ++ slirp_set_cloexec(ret); ++ } ++ ++ return ret; ++} ++ ++#ifdef _WIN32 ++static int socket_error(void) ++{ ++ switch (WSAGetLastError()) { ++ case 0: ++ return 0; ++ case WSAEINTR: ++ return EINTR; ++ case WSAEINVAL: ++ return EINVAL; ++ case WSA_INVALID_HANDLE: ++ return EBADF; ++ case WSA_NOT_ENOUGH_MEMORY: ++ return ENOMEM; ++ case WSA_INVALID_PARAMETER: ++ return EINVAL; ++ case WSAENAMETOOLONG: ++ return ENAMETOOLONG; ++ case WSAENOTEMPTY: ++ return ENOTEMPTY; ++ case WSAEWOULDBLOCK: ++ /* not using EWOULDBLOCK as we don't want code to have ++ * to check both EWOULDBLOCK and EAGAIN */ ++ return EAGAIN; ++ case WSAEINPROGRESS: ++ return EINPROGRESS; ++ case WSAEALREADY: ++ return EALREADY; ++ case WSAENOTSOCK: ++ return ENOTSOCK; ++ case WSAEDESTADDRREQ: ++ return EDESTADDRREQ; ++ case WSAEMSGSIZE: ++ return EMSGSIZE; ++ case WSAEPROTOTYPE: ++ return EPROTOTYPE; ++ case WSAENOPROTOOPT: ++ return ENOPROTOOPT; ++ case WSAEPROTONOSUPPORT: ++ return EPROTONOSUPPORT; ++ case WSAEOPNOTSUPP: ++ return EOPNOTSUPP; ++ case WSAEAFNOSUPPORT: ++ return EAFNOSUPPORT; ++ case WSAEADDRINUSE: ++ return EADDRINUSE; ++ case WSAEADDRNOTAVAIL: ++ return EADDRNOTAVAIL; ++ case WSAENETDOWN: ++ return ENETDOWN; ++ case WSAENETUNREACH: ++ return ENETUNREACH; ++ case WSAENETRESET: ++ return ENETRESET; ++ case WSAECONNABORTED: ++ return ECONNABORTED; ++ case WSAECONNRESET: ++ return ECONNRESET; ++ case WSAENOBUFS: ++ return ENOBUFS; ++ case WSAEISCONN: ++ return EISCONN; ++ case WSAENOTCONN: ++ return ENOTCONN; ++ case WSAETIMEDOUT: ++ return ETIMEDOUT; ++ case WSAECONNREFUSED: ++ return ECONNREFUSED; ++ case WSAELOOP: ++ return ELOOP; ++ case WSAEHOSTUNREACH: ++ return EHOSTUNREACH; ++ default: ++ return EIO; ++ } ++} ++ ++#undef ioctlsocket ++int slirp_ioctlsocket_wrap(int fd, int req, void *val) ++{ ++ int ret; ++ ret = ioctlsocket(fd, req, val); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef closesocket ++int slirp_closesocket_wrap(int fd) ++{ ++ int ret; ++ ret = closesocket(fd); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef connect ++int slirp_connect_wrap(int sockfd, const struct sockaddr *addr, int addrlen) ++{ ++ int ret; ++ ret = connect(sockfd, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef listen ++int slirp_listen_wrap(int sockfd, int backlog) ++{ ++ int ret; ++ ret = listen(sockfd, backlog); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef bind ++int slirp_bind_wrap(int sockfd, const struct sockaddr *addr, int addrlen) ++{ ++ int ret; ++ ret = bind(sockfd, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef socket ++int slirp_socket_wrap(int domain, int type, int protocol) ++{ ++ int ret; ++ ret = socket(domain, type, protocol); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef accept ++int slirp_accept_wrap(int sockfd, struct sockaddr *addr, int *addrlen) ++{ ++ int ret; ++ ret = accept(sockfd, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef shutdown ++int slirp_shutdown_wrap(int sockfd, int how) ++{ ++ int ret; ++ ret = shutdown(sockfd, how); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef getsockopt ++int slirp_getsockopt_wrap(int sockfd, int level, int optname, void *optval, ++ int *optlen) ++{ ++ int ret; ++ ret = getsockopt(sockfd, level, optname, optval, optlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef setsockopt ++int slirp_setsockopt_wrap(int sockfd, int level, int optname, ++ const void *optval, int optlen) ++{ ++ int ret; ++ ret = setsockopt(sockfd, level, optname, optval, optlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef getpeername ++int slirp_getpeername_wrap(int sockfd, struct sockaddr *addr, int *addrlen) ++{ ++ int ret; ++ ret = getpeername(sockfd, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef getsockname ++int slirp_getsockname_wrap(int sockfd, struct sockaddr *addr, int *addrlen) ++{ ++ int ret; ++ ret = getsockname(sockfd, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef send ++ssize_t slirp_send_wrap(int sockfd, const void *buf, size_t len, int flags) ++{ ++ int ret; ++ ret = send(sockfd, buf, len, flags); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef sendto ++ssize_t slirp_sendto_wrap(int sockfd, const void *buf, size_t len, int flags, ++ const struct sockaddr *addr, int addrlen) ++{ ++ int ret; ++ ret = sendto(sockfd, buf, len, flags, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef recv ++ssize_t slirp_recv_wrap(int sockfd, void *buf, size_t len, int flags) ++{ ++ int ret; ++ ret = recv(sockfd, buf, len, flags); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef recvfrom ++ssize_t slirp_recvfrom_wrap(int sockfd, void *buf, size_t len, int flags, ++ struct sockaddr *addr, int *addrlen) ++{ ++ int ret; ++ ret = recvfrom(sockfd, buf, len, flags, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++#endif /* WIN32 */ ++ ++void slirp_pstrcpy(char *buf, int buf_size, const char *str) ++{ ++ int c; ++ char *q = buf; ++ ++ if (buf_size <= 0) ++ return; ++ ++ for (;;) { ++ c = *str++; ++ if (c == 0 || q >= buf + buf_size - 1) ++ break; ++ *q++ = c; ++ } ++ *q = '\0'; ++} ++ ++G_GNUC_PRINTF(3, 0) ++static int slirp_vsnprintf(char *str, size_t size, ++ const char *format, va_list args) ++{ ++ int rv = g_vsnprintf(str, size, format, args); ++ ++ if (rv < 0) { ++ g_error("g_vsnprintf() failed: %s", g_strerror(errno)); ++ } ++ ++ return rv; ++} ++ ++/* ++ * A snprintf()-like function that: ++ * - returns the number of bytes written (excluding optional \0-ending) ++ * - dies on error ++ * - warn on truncation ++ */ ++int slirp_fmt(char *str, size_t size, const char *format, ...) ++{ ++ va_list args; ++ int rv; ++ ++ va_start(args, format); ++ rv = slirp_vsnprintf(str, size, format, args); ++ va_end(args); ++ ++ if (rv >= size) { ++ g_critical("slirp_fmt() truncation"); ++ } ++ ++ return MIN(rv, size); ++} ++ ++/* ++ * A snprintf()-like function that: ++ * - always \0-end (unless size == 0) ++ * - returns the number of bytes actually written, including \0 ending ++ * - dies on error ++ * - warn on truncation ++ */ ++int slirp_fmt0(char *str, size_t size, const char *format, ...) ++{ ++ va_list args; ++ int rv; ++ ++ va_start(args, format); ++ rv = slirp_vsnprintf(str, size, format, args); ++ va_end(args); ++ ++ if (rv >= size) { ++ g_critical("slirp_fmt0() truncation"); ++ if (size > 0) ++ str[size - 1] = '\0'; ++ rv = size; ++ } else { ++ rv += 1; /* include \0 */ ++ } ++ ++ return rv; ++} ++ ++const char *slirp_ether_ntoa(const uint8_t *addr, char *out_str, ++ size_t out_str_size) ++{ ++ assert(out_str_size >= ETH_ADDRSTRLEN); ++ ++ slirp_fmt0(out_str, out_str_size, "%02x:%02x:%02x:%02x:%02x:%02x", ++ addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); ++ ++ return out_str; ++} +diff --git a/slirp/src/util.h b/slirp/src/util.h +new file mode 100644 +index 0000000000..07654ecf37 +--- /dev/null ++++ b/slirp/src/util.h +@@ -0,0 +1,203 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * Copyright (c) 2003-2008 Fabrice Bellard ++ * Copyright (c) 2010-2019 Red Hat, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#ifndef UTIL_H_ ++#define UTIL_H_ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef _WIN32 ++#include ++#include ++#include ++#else ++#include ++#include ++#include ++#endif ++ ++#if defined(_WIN32) && (defined(__x86_64__) || defined(__i386__)) ++#define SLIRP_PACKED __attribute__((gcc_struct, packed)) ++#else ++#define SLIRP_PACKED __attribute__((packed)) ++#endif ++ ++#ifndef DIV_ROUND_UP ++#define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d)) ++#endif ++ ++#ifndef container_of ++#define container_of(ptr, type, member) \ ++ __extension__({ \ ++ void *__mptr = (void *)(ptr); \ ++ ((type *)(__mptr - offsetof(type, member))); \ ++ }) ++#endif ++ ++#ifndef G_SIZEOF_MEMBER ++#define G_SIZEOF_MEMBER(type, member) sizeof(((type *)0)->member) ++#endif ++ ++#if defined(_WIN32) /* CONFIG_IOVEC */ ++#if !defined(IOV_MAX) /* XXX: to avoid duplicate with QEMU osdep.h */ ++struct iovec { ++ void *iov_base; ++ size_t iov_len; ++}; ++#endif ++#else ++#include ++#endif ++ ++#define stringify(s) tostring(s) ++#define tostring(s) #s ++ ++#define SCALE_MS 1000000 ++ ++#define ETH_ALEN 6 ++#define ETH_ADDRSTRLEN 18 /* "xx:xx:xx:xx:xx:xx", with trailing NUL */ ++#define ETH_HLEN 14 ++#define ETH_P_IP (0x0800) /* Internet Protocol packet */ ++#define ETH_P_ARP (0x0806) /* Address Resolution packet */ ++#define ETH_P_IPV6 (0x86dd) ++#define ETH_P_VLAN (0x8100) ++#define ETH_P_DVLAN (0x88a8) ++#define ETH_P_NCSI (0x88f8) ++#define ETH_P_UNKNOWN (0xffff) ++ ++/* FIXME: remove me when made standalone */ ++#ifdef _WIN32 ++#undef accept ++#undef bind ++#undef closesocket ++#undef connect ++#undef getpeername ++#undef getsockname ++#undef getsockopt ++#undef ioctlsocket ++#undef listen ++#undef recv ++#undef recvfrom ++#undef send ++#undef sendto ++#undef setsockopt ++#undef shutdown ++#undef socket ++#endif ++ ++#ifdef _WIN32 ++#define connect slirp_connect_wrap ++int slirp_connect_wrap(int fd, const struct sockaddr *addr, int addrlen); ++#define listen slirp_listen_wrap ++int slirp_listen_wrap(int fd, int backlog); ++#define bind slirp_bind_wrap ++int slirp_bind_wrap(int fd, const struct sockaddr *addr, int addrlen); ++#define socket slirp_socket_wrap ++int slirp_socket_wrap(int domain, int type, int protocol); ++#define accept slirp_accept_wrap ++int slirp_accept_wrap(int fd, struct sockaddr *addr, int *addrlen); ++#define shutdown slirp_shutdown_wrap ++int slirp_shutdown_wrap(int fd, int how); ++#define getpeername slirp_getpeername_wrap ++int slirp_getpeername_wrap(int fd, struct sockaddr *addr, int *addrlen); ++#define getsockname slirp_getsockname_wrap ++int slirp_getsockname_wrap(int fd, struct sockaddr *addr, int *addrlen); ++#define send slirp_send_wrap ++ssize_t slirp_send_wrap(int fd, const void *buf, size_t len, int flags); ++#define sendto slirp_sendto_wrap ++ssize_t slirp_sendto_wrap(int fd, const void *buf, size_t len, int flags, ++ const struct sockaddr *dest_addr, int addrlen); ++#define recv slirp_recv_wrap ++ssize_t slirp_recv_wrap(int fd, void *buf, size_t len, int flags); ++#define recvfrom slirp_recvfrom_wrap ++ssize_t slirp_recvfrom_wrap(int fd, void *buf, size_t len, int flags, ++ struct sockaddr *src_addr, int *addrlen); ++#define closesocket slirp_closesocket_wrap ++int slirp_closesocket_wrap(int fd); ++#define ioctlsocket slirp_ioctlsocket_wrap ++int slirp_ioctlsocket_wrap(int fd, int req, void *val); ++#define getsockopt slirp_getsockopt_wrap ++int slirp_getsockopt_wrap(int sockfd, int level, int optname, void *optval, ++ int *optlen); ++#define setsockopt slirp_setsockopt_wrap ++int slirp_setsockopt_wrap(int sockfd, int level, int optname, ++ const void *optval, int optlen); ++#define inet_aton slirp_inet_aton ++int slirp_inet_aton(const char *cp, struct in_addr *ia); ++#else ++#define closesocket(s) close(s) ++#define ioctlsocket(s, r, v) ioctl(s, r, v) ++#endif ++ ++int slirp_socket(int domain, int type, int protocol); ++void slirp_set_nonblock(int fd); ++ ++static inline int slirp_socket_set_v6only(int fd, int v) ++{ ++ return setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &v, sizeof(v)); ++} ++ ++static inline int slirp_socket_set_nodelay(int fd) ++{ ++ int v = 1; ++ return setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v)); ++} ++ ++static inline int slirp_socket_set_fast_reuse(int fd) ++{ ++#ifndef _WIN32 ++ int v = 1; ++ return setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &v, sizeof(v)); ++#else ++ /* Enabling the reuse of an endpoint that was used by a socket still in ++ * TIME_WAIT state is usually performed by setting SO_REUSEADDR. On Windows ++ * fast reuse is the default and SO_REUSEADDR does strange things. So we ++ * don't have to do anything here. More info can be found at: ++ * http://msdn.microsoft.com/en-us/library/windows/desktop/ms740621.aspx */ ++ return 0; ++#endif ++} ++ ++void slirp_pstrcpy(char *buf, int buf_size, const char *str); ++ ++int slirp_fmt(char *str, size_t size, const char *format, ...) G_GNUC_PRINTF(3, 4); ++int slirp_fmt0(char *str, size_t size, const char *format, ...) G_GNUC_PRINTF(3, 4); ++ ++/* ++ * Pretty print a MAC address into out_str. ++ * As a convenience returns out_str. ++ */ ++const char *slirp_ether_ntoa(const uint8_t *addr, char *out_str, ++ size_t out_str_len); ++ ++#endif +diff --git a/slirp/src/version.c b/slirp/src/version.c +new file mode 100644 +index 0000000000..93e0be9c24 +--- /dev/null ++++ b/slirp/src/version.c +@@ -0,0 +1,8 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++#include "libslirp.h" ++ ++const char * ++slirp_version_string(void) ++{ ++ return SLIRP_VERSION_STRING; ++} +diff --git a/slirp/src/vmstate.c b/slirp/src/vmstate.c +new file mode 100644 +index 0000000000..68cc1729c5 +--- /dev/null ++++ b/slirp/src/vmstate.c +@@ -0,0 +1,444 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * VMState interpreter ++ * ++ * Copyright (c) 2009-2018 Red Hat Inc ++ * ++ * Authors: ++ * Juan Quintela ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++#include ++#include ++#include ++#include ++ ++#include "stream.h" ++#include "vmstate.h" ++ ++static int get_nullptr(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ if (slirp_istream_read_u8(f) == VMS_NULLPTR_MARKER) { ++ return 0; ++ } ++ g_warning("vmstate: get_nullptr expected VMS_NULLPTR_MARKER"); ++ return -EINVAL; ++} ++ ++static int put_nullptr(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++ ++{ ++ if (pv == NULL) { ++ slirp_ostream_write_u8(f, VMS_NULLPTR_MARKER); ++ return 0; ++ } ++ g_warning("vmstate: put_nullptr must be called with pv == NULL"); ++ return -EINVAL; ++} ++ ++const VMStateInfo slirp_vmstate_info_nullptr = { ++ .name = "uint64", ++ .get = get_nullptr, ++ .put = put_nullptr, ++}; ++ ++/* 8 bit unsigned int */ ++ ++static int get_uint8(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint8_t *v = pv; ++ *v = slirp_istream_read_u8(f); ++ return 0; ++} ++ ++static int put_uint8(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint8_t *v = pv; ++ slirp_ostream_write_u8(f, *v); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_uint8 = { ++ .name = "uint8", ++ .get = get_uint8, ++ .put = put_uint8, ++}; ++ ++/* 16 bit unsigned int */ ++ ++static int get_uint16(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint16_t *v = pv; ++ *v = slirp_istream_read_u16(f); ++ return 0; ++} ++ ++static int put_uint16(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint16_t *v = pv; ++ slirp_ostream_write_u16(f, *v); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_uint16 = { ++ .name = "uint16", ++ .get = get_uint16, ++ .put = put_uint16, ++}; ++ ++/* 32 bit unsigned int */ ++ ++static int get_uint32(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint32_t *v = pv; ++ *v = slirp_istream_read_u32(f); ++ return 0; ++} ++ ++static int put_uint32(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint32_t *v = pv; ++ slirp_ostream_write_u32(f, *v); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_uint32 = { ++ .name = "uint32", ++ .get = get_uint32, ++ .put = put_uint32, ++}; ++ ++/* 16 bit int */ ++ ++static int get_int16(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ int16_t *v = pv; ++ *v = slirp_istream_read_i16(f); ++ return 0; ++} ++ ++static int put_int16(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ int16_t *v = pv; ++ slirp_ostream_write_i16(f, *v); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_int16 = { ++ .name = "int16", ++ .get = get_int16, ++ .put = put_int16, ++}; ++ ++/* 32 bit int */ ++ ++static int get_int32(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ int32_t *v = pv; ++ *v = slirp_istream_read_i32(f); ++ return 0; ++} ++ ++static int put_int32(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ int32_t *v = pv; ++ slirp_ostream_write_i32(f, *v); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_int32 = { ++ .name = "int32", ++ .get = get_int32, ++ .put = put_int32, ++}; ++ ++/* vmstate_info_tmp, see VMSTATE_WITH_TMP, the idea is that we allocate ++ * a temporary buffer and the pre_load/pre_save methods in the child vmsd ++ * copy stuff from the parent into the child and do calculations to fill ++ * in fields that don't really exist in the parent but need to be in the ++ * stream. ++ */ ++static int get_tmp(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ int ret; ++ const VMStateDescription *vmsd = field->vmsd; ++ int version_id = field->version_id; ++ void *tmp = g_malloc(size); ++ ++ /* Writes the parent field which is at the start of the tmp */ ++ *(void **)tmp = pv; ++ ret = slirp_vmstate_load_state(f, vmsd, tmp, version_id); ++ g_free(tmp); ++ return ret; ++} ++ ++static int put_tmp(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ const VMStateDescription *vmsd = field->vmsd; ++ void *tmp = g_malloc(size); ++ int ret; ++ ++ /* Writes the parent field which is at the start of the tmp */ ++ *(void **)tmp = pv; ++ ret = slirp_vmstate_save_state(f, vmsd, tmp); ++ g_free(tmp); ++ ++ return ret; ++} ++ ++const VMStateInfo slirp_vmstate_info_tmp = { ++ .name = "tmp", ++ .get = get_tmp, ++ .put = put_tmp, ++}; ++ ++/* uint8_t buffers */ ++ ++static int get_buffer(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ slirp_istream_read(f, pv, size); ++ return 0; ++} ++ ++static int put_buffer(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ slirp_ostream_write(f, pv, size); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_buffer = { ++ .name = "buffer", ++ .get = get_buffer, ++ .put = put_buffer, ++}; ++ ++static int vmstate_n_elems(void *opaque, const VMStateField *field) ++{ ++ int n_elems = 1; ++ ++ if (field->flags & VMS_ARRAY) { ++ n_elems = field->num; ++ } else if (field->flags & VMS_VARRAY_INT32) { ++ n_elems = *(int32_t *)(opaque + field->num_offset); ++ } else if (field->flags & VMS_VARRAY_UINT32) { ++ n_elems = *(uint32_t *)(opaque + field->num_offset); ++ } else if (field->flags & VMS_VARRAY_UINT16) { ++ n_elems = *(uint16_t *)(opaque + field->num_offset); ++ } else if (field->flags & VMS_VARRAY_UINT8) { ++ n_elems = *(uint8_t *)(opaque + field->num_offset); ++ } ++ ++ if (field->flags & VMS_MULTIPLY_ELEMENTS) { ++ n_elems *= field->num; ++ } ++ ++ return n_elems; ++} ++ ++static int vmstate_size(void *opaque, const VMStateField *field) ++{ ++ int size = field->size; ++ ++ if (field->flags & VMS_VBUFFER) { ++ size = *(int32_t *)(opaque + field->size_offset); ++ if (field->flags & VMS_MULTIPLY) { ++ size *= field->size; ++ } ++ } ++ ++ return size; ++} ++ ++static int vmstate_save_state_v(SlirpOStream *f, const VMStateDescription *vmsd, ++ void *opaque, int version_id) ++{ ++ int ret = 0; ++ const VMStateField *field = vmsd->fields; ++ ++ if (vmsd->pre_save) { ++ ret = vmsd->pre_save(opaque); ++ if (ret) { ++ g_warning("pre-save failed: %s", vmsd->name); ++ return ret; ++ } ++ } ++ ++ while (field->name) { ++ if ((field->field_exists && field->field_exists(opaque, version_id)) || ++ (!field->field_exists && field->version_id <= version_id)) { ++ void *first_elem = opaque + field->offset; ++ int i, n_elems = vmstate_n_elems(opaque, field); ++ int size = vmstate_size(opaque, field); ++ ++ if (field->flags & VMS_POINTER) { ++ first_elem = *(void **)first_elem; ++ assert(first_elem || !n_elems || !size); ++ } ++ for (i = 0; i < n_elems; i++) { ++ void *curr_elem = first_elem + size * i; ++ ++ if (field->flags & VMS_ARRAY_OF_POINTER) { ++ assert(curr_elem); ++ curr_elem = *(void **)curr_elem; ++ } ++ if (!curr_elem && size) { ++ /* if null pointer write placeholder and do not follow */ ++ assert(field->flags & VMS_ARRAY_OF_POINTER); ++ ret = slirp_vmstate_info_nullptr.put(f, curr_elem, size, ++ NULL); ++ } else if (field->flags & VMS_STRUCT) { ++ ret = slirp_vmstate_save_state(f, field->vmsd, curr_elem); ++ } else if (field->flags & VMS_VSTRUCT) { ++ ret = vmstate_save_state_v(f, field->vmsd, curr_elem, ++ field->struct_version_id); ++ } else { ++ ret = field->info->put(f, curr_elem, size, field); ++ } ++ if (ret) { ++ g_warning("Save of field %s/%s failed", vmsd->name, ++ field->name); ++ return ret; ++ } ++ } ++ } else { ++ if (field->flags & VMS_MUST_EXIST) { ++ g_warning("Output state validation failed: %s/%s", vmsd->name, ++ field->name); ++ assert(!(field->flags & VMS_MUST_EXIST)); ++ } ++ } ++ field++; ++ } ++ ++ return 0; ++} ++ ++int slirp_vmstate_save_state(SlirpOStream *f, const VMStateDescription *vmsd, ++ void *opaque) ++{ ++ return vmstate_save_state_v(f, vmsd, opaque, vmsd->version_id); ++} ++ ++static void vmstate_handle_alloc(void *ptr, VMStateField *field, void *opaque) ++{ ++ if (field->flags & VMS_POINTER && field->flags & VMS_ALLOC) { ++ size_t size = vmstate_size(opaque, field); ++ size *= vmstate_n_elems(opaque, field); ++ if (size) { ++ *(void **)ptr = g_malloc(size); ++ } ++ } ++} ++ ++int slirp_vmstate_load_state(SlirpIStream *f, const VMStateDescription *vmsd, ++ void *opaque, int version_id) ++{ ++ VMStateField *field = vmsd->fields; ++ int ret = 0; ++ ++ if (version_id > vmsd->version_id) { ++ g_warning("%s: incoming version_id %d is too new " ++ "for local version_id %d", ++ vmsd->name, version_id, vmsd->version_id); ++ return -EINVAL; ++ } ++ if (vmsd->pre_load) { ++ int ret = vmsd->pre_load(opaque); ++ if (ret) { ++ return ret; ++ } ++ } ++ while (field->name) { ++ if ((field->field_exists && field->field_exists(opaque, version_id)) || ++ (!field->field_exists && field->version_id <= version_id)) { ++ void *first_elem = opaque + field->offset; ++ int i, n_elems = vmstate_n_elems(opaque, field); ++ int size = vmstate_size(opaque, field); ++ ++ vmstate_handle_alloc(first_elem, field, opaque); ++ if (field->flags & VMS_POINTER) { ++ first_elem = *(void **)first_elem; ++ assert(first_elem || !n_elems || !size); ++ } ++ for (i = 0; i < n_elems; i++) { ++ void *curr_elem = first_elem + size * i; ++ ++ if (field->flags & VMS_ARRAY_OF_POINTER) { ++ curr_elem = *(void **)curr_elem; ++ } ++ if (!curr_elem && size) { ++ /* if null pointer check placeholder and do not follow */ ++ assert(field->flags & VMS_ARRAY_OF_POINTER); ++ ret = slirp_vmstate_info_nullptr.get(f, curr_elem, size, ++ NULL); ++ } else if (field->flags & VMS_STRUCT) { ++ ret = slirp_vmstate_load_state(f, field->vmsd, curr_elem, ++ field->vmsd->version_id); ++ } else if (field->flags & VMS_VSTRUCT) { ++ ret = slirp_vmstate_load_state(f, field->vmsd, curr_elem, ++ field->struct_version_id); ++ } else { ++ ret = field->info->get(f, curr_elem, size, field); ++ } ++ if (ret < 0) { ++ g_warning("Failed to load %s:%s", vmsd->name, field->name); ++ return ret; ++ } ++ } ++ } else if (field->flags & VMS_MUST_EXIST) { ++ g_warning("Input validation failed: %s/%s", vmsd->name, ++ field->name); ++ return -1; ++ } ++ field++; ++ } ++ if (vmsd->post_load) { ++ ret = vmsd->post_load(opaque, version_id); ++ } ++ return ret; ++} +diff --git a/slirp/src/vmstate.h b/slirp/src/vmstate.h +new file mode 100644 +index 0000000000..94c6a4bc7b +--- /dev/null ++++ b/slirp/src/vmstate.h +@@ -0,0 +1,391 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * QEMU migration/snapshot declarations ++ * ++ * Copyright (c) 2009-2011 Red Hat, Inc. ++ * ++ * Original author: Juan Quintela ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++#ifndef VMSTATE_H_ ++#define VMSTATE_H_ ++ ++#include ++#include ++#include ++#include "slirp.h" ++#include "stream.h" ++ ++#define stringify(s) tostring(s) ++#define tostring(s) #s ++ ++typedef struct VMStateInfo VMStateInfo; ++typedef struct VMStateDescription VMStateDescription; ++typedef struct VMStateField VMStateField; ++ ++int slirp_vmstate_save_state(SlirpOStream *f, const VMStateDescription *vmsd, ++ void *opaque); ++int slirp_vmstate_load_state(SlirpIStream *f, const VMStateDescription *vmsd, ++ void *opaque, int version_id); ++ ++/* VMStateInfo allows customized migration of objects that don't fit in ++ * any category in VMStateFlags. Additional information is always passed ++ * into get and put in terms of field and vmdesc parameters. However ++ * these two parameters should only be used in cases when customized ++ * handling is needed, such as QTAILQ. For primitive data types such as ++ * integer, field and vmdesc parameters should be ignored inside get/put. ++ */ ++struct VMStateInfo { ++ const char *name; ++ int (*get)(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field); ++ int (*put)(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field); ++}; ++ ++enum VMStateFlags { ++ /* Ignored */ ++ VMS_SINGLE = 0x001, ++ ++ /* The struct member at opaque + VMStateField.offset is a pointer ++ * to the actual field (e.g. struct a { uint8_t *b; ++ * }). Dereference the pointer before using it as basis for ++ * further pointer arithmetic (see e.g. VMS_ARRAY). Does not ++ * affect the meaning of VMStateField.num_offset or ++ * VMStateField.size_offset; see VMS_VARRAY* and VMS_VBUFFER for ++ * those. */ ++ VMS_POINTER = 0x002, ++ ++ /* The field is an array of fixed size. VMStateField.num contains ++ * the number of entries in the array. The size of each entry is ++ * given by VMStateField.size and / or opaque + ++ * VMStateField.size_offset; see VMS_VBUFFER and ++ * VMS_MULTIPLY. Each array entry will be processed individually ++ * (VMStateField.info.get()/put() if VMS_STRUCT is not set, ++ * recursion into VMStateField.vmsd if VMS_STRUCT is set). May not ++ * be combined with VMS_VARRAY*. */ ++ VMS_ARRAY = 0x004, ++ ++ /* The field is itself a struct, containing one or more ++ * fields. Recurse into VMStateField.vmsd. Most useful in ++ * combination with VMS_ARRAY / VMS_VARRAY*, recursing into each ++ * array entry. */ ++ VMS_STRUCT = 0x008, ++ ++ /* The field is an array of variable size. The int32_t at opaque + ++ * VMStateField.num_offset contains the number of entries in the ++ * array. See the VMS_ARRAY description regarding array handling ++ * in general. May not be combined with VMS_ARRAY or any other ++ * VMS_VARRAY*. */ ++ VMS_VARRAY_INT32 = 0x010, ++ ++ /* Ignored */ ++ VMS_BUFFER = 0x020, ++ ++ /* The field is a (fixed-size or variable-size) array of pointers ++ * (e.g. struct a { uint8_t *b[]; }). Dereference each array entry ++ * before using it. Note: Does not imply any one of VMS_ARRAY / ++ * VMS_VARRAY*; these need to be set explicitly. */ ++ VMS_ARRAY_OF_POINTER = 0x040, ++ ++ /* The field is an array of variable size. The uint16_t at opaque ++ * + VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) ++ * contains the number of entries in the array. See the VMS_ARRAY ++ * description regarding array handling in general. May not be ++ * combined with VMS_ARRAY or any other VMS_VARRAY*. */ ++ VMS_VARRAY_UINT16 = 0x080, ++ ++ /* The size of the individual entries (a single array entry if ++ * VMS_ARRAY or any of VMS_VARRAY* are set, or the field itself if ++ * neither is set) is variable (i.e. not known at compile-time), ++ * but the same for all entries. Use the int32_t at opaque + ++ * VMStateField.size_offset (subject to VMS_MULTIPLY) to determine ++ * the size of each (and every) entry. */ ++ VMS_VBUFFER = 0x100, ++ ++ /* Multiply the entry size given by the int32_t at opaque + ++ * VMStateField.size_offset (see VMS_VBUFFER description) with ++ * VMStateField.size to determine the number of bytes to be ++ * allocated. Only valid in combination with VMS_VBUFFER. */ ++ VMS_MULTIPLY = 0x200, ++ ++ /* The field is an array of variable size. The uint8_t at opaque + ++ * VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) ++ * contains the number of entries in the array. See the VMS_ARRAY ++ * description regarding array handling in general. May not be ++ * combined with VMS_ARRAY or any other VMS_VARRAY*. */ ++ VMS_VARRAY_UINT8 = 0x400, ++ ++ /* The field is an array of variable size. The uint32_t at opaque ++ * + VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) ++ * contains the number of entries in the array. See the VMS_ARRAY ++ * description regarding array handling in general. May not be ++ * combined with VMS_ARRAY or any other VMS_VARRAY*. */ ++ VMS_VARRAY_UINT32 = 0x800, ++ ++ /* Fail loading the serialised VM state if this field is missing ++ * from the input. */ ++ VMS_MUST_EXIST = 0x1000, ++ ++ /* When loading serialised VM state, allocate memory for the ++ * (entire) field. Only valid in combination with ++ * VMS_POINTER. Note: Not all combinations with other flags are ++ * currently supported, e.g. VMS_ALLOC|VMS_ARRAY_OF_POINTER won't ++ * cause the individual entries to be allocated. */ ++ VMS_ALLOC = 0x2000, ++ ++ /* Multiply the number of entries given by the integer at opaque + ++ * VMStateField.num_offset (see VMS_VARRAY*) with VMStateField.num ++ * to determine the number of entries in the array. Only valid in ++ * combination with one of VMS_VARRAY*. */ ++ VMS_MULTIPLY_ELEMENTS = 0x4000, ++ ++ /* A structure field that is like VMS_STRUCT, but uses ++ * VMStateField.struct_version_id to tell which version of the ++ * structure we are referencing to use. */ ++ VMS_VSTRUCT = 0x8000, ++}; ++ ++struct VMStateField { ++ const char *name; ++ size_t offset; ++ size_t size; ++ size_t start; ++ int num; ++ size_t num_offset; ++ size_t size_offset; ++ const VMStateInfo *info; ++ enum VMStateFlags flags; ++ const VMStateDescription *vmsd; ++ int version_id; ++ int struct_version_id; ++ bool (*field_exists)(void *opaque, int version_id); ++}; ++ ++struct VMStateDescription { ++ const char *name; ++ int version_id; ++ int (*pre_load)(void *opaque); ++ int (*post_load)(void *opaque, int version_id); ++ int (*pre_save)(void *opaque); ++ VMStateField *fields; ++}; ++ ++ ++extern const VMStateInfo slirp_vmstate_info_int16; ++extern const VMStateInfo slirp_vmstate_info_int32; ++extern const VMStateInfo slirp_vmstate_info_uint8; ++extern const VMStateInfo slirp_vmstate_info_uint16; ++extern const VMStateInfo slirp_vmstate_info_uint32; ++ ++/** Put this in the stream when migrating a null pointer.*/ ++#define VMS_NULLPTR_MARKER (0x30U) /* '0' */ ++extern const VMStateInfo slirp_vmstate_info_nullptr; ++ ++extern const VMStateInfo slirp_vmstate_info_buffer; ++extern const VMStateInfo slirp_vmstate_info_tmp; ++ ++#define type_check_array(t1, t2, n) ((t1(*)[n])0 - (t2 *)0) ++#define type_check_pointer(t1, t2) ((t1 **)0 - (t2 *)0) ++#define typeof_field(type, field) typeof(((type *)0)->field) ++#define type_check(t1, t2) ((t1 *)0 - (t2 *)0) ++ ++#define vmstate_offset_value(_state, _field, _type) \ ++ (offsetof(_state, _field) + type_check(_type, typeof_field(_state, _field))) ++ ++#define vmstate_offset_pointer(_state, _field, _type) \ ++ (offsetof(_state, _field) + \ ++ type_check_pointer(_type, typeof_field(_state, _field))) ++ ++#define vmstate_offset_array(_state, _field, _type, _num) \ ++ (offsetof(_state, _field) + \ ++ type_check_array(_type, typeof_field(_state, _field), _num)) ++ ++#define vmstate_offset_buffer(_state, _field) \ ++ vmstate_offset_array(_state, _field, uint8_t, \ ++ sizeof(typeof_field(_state, _field))) ++ ++/* In the macros below, if there is a _version, that means the macro's ++ * field will be processed only if the version being received is >= ++ * the _version specified. In general, if you add a new field, you ++ * would increment the structure's version and put that version ++ * number into the new field so it would only be processed with the ++ * new version. ++ * ++ * In particular, for VMSTATE_STRUCT() and friends the _version does ++ * *NOT* pick the version of the sub-structure. It works just as ++ * specified above. The version of the top-level structure received ++ * is passed down to all sub-structures. This means that the ++ * sub-structures must have version that are compatible with all the ++ * structures that use them. ++ * ++ * If you want to specify the version of the sub-structure, use ++ * VMSTATE_VSTRUCT(), which allows the specific sub-structure version ++ * to be directly specified. ++ */ ++ ++#define VMSTATE_SINGLE_TEST(_field, _state, _test, _version, _info, _type) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), \ ++ .field_exists = (_test), .size = sizeof(_type), .info = &(_info), \ ++ .flags = VMS_SINGLE, \ ++ .offset = vmstate_offset_value(_state, _field, _type), \ ++ } ++ ++#define VMSTATE_ARRAY(_field, _state, _num, _version, _info, _type) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), .num = (_num), \ ++ .info = &(_info), .size = sizeof(_type), .flags = VMS_ARRAY, \ ++ .offset = vmstate_offset_array(_state, _field, _type, _num), \ ++ } ++ ++#define VMSTATE_STRUCT_TEST(_field, _state, _test, _version, _vmsd, _type) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), \ ++ .field_exists = (_test), .vmsd = &(_vmsd), .size = sizeof(_type), \ ++ .flags = VMS_STRUCT, \ ++ .offset = vmstate_offset_value(_state, _field, _type), \ ++ } ++ ++#define VMSTATE_STRUCT_POINTER_V(_field, _state, _version, _vmsd, _type) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), \ ++ .vmsd = &(_vmsd), .size = sizeof(_type *), \ ++ .flags = VMS_STRUCT | VMS_POINTER, \ ++ .offset = vmstate_offset_pointer(_state, _field, _type), \ ++ } ++ ++#define VMSTATE_STRUCT_ARRAY_TEST(_field, _state, _num, _test, _version, \ ++ _vmsd, _type) \ ++ { \ ++ .name = (stringify(_field)), .num = (_num), .field_exists = (_test), \ ++ .version_id = (_version), .vmsd = &(_vmsd), .size = sizeof(_type), \ ++ .flags = VMS_STRUCT | VMS_ARRAY, \ ++ .offset = vmstate_offset_array(_state, _field, _type, _num), \ ++ } ++ ++#define VMSTATE_STATIC_BUFFER(_field, _state, _version, _test, _start, _size) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), \ ++ .field_exists = (_test), .size = (_size - _start), \ ++ .info = &slirp_vmstate_info_buffer, .flags = VMS_BUFFER, \ ++ .offset = vmstate_offset_buffer(_state, _field) + _start, \ ++ } ++ ++#define VMSTATE_VBUFFER_UINT32(_field, _state, _version, _test, _field_size) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), \ ++ .field_exists = (_test), \ ++ .size_offset = vmstate_offset_value(_state, _field_size, uint32_t), \ ++ .info = &slirp_vmstate_info_buffer, \ ++ .flags = VMS_VBUFFER | VMS_POINTER, \ ++ .offset = offsetof(_state, _field), \ ++ } ++ ++#define QEMU_BUILD_BUG_ON_STRUCT(x) \ ++ struct { \ ++ int : (x) ? -1 : 1; \ ++ } ++ ++#define QEMU_BUILD_BUG_ON_ZERO(x) \ ++ (sizeof(QEMU_BUILD_BUG_ON_STRUCT(x)) - sizeof(QEMU_BUILD_BUG_ON_STRUCT(x))) ++ ++/* Allocate a temporary of type 'tmp_type', set tmp->parent to _state ++ * and execute the vmsd on the temporary. Note that we're working with ++ * the whole of _state here, not a field within it. ++ * We compile time check that: ++ * That _tmp_type contains a 'parent' member that's a pointer to the ++ * '_state' type ++ * That the pointer is right at the start of _tmp_type. ++ */ ++#define VMSTATE_WITH_TMP(_state, _tmp_type, _vmsd) \ ++ { \ ++ .name = "tmp", \ ++ .size = sizeof(_tmp_type) + \ ++ QEMU_BUILD_BUG_ON_ZERO(offsetof(_tmp_type, parent) != 0) + \ ++ type_check_pointer(_state, typeof_field(_tmp_type, parent)), \ ++ .vmsd = &(_vmsd), .info = &slirp_vmstate_info_tmp, \ ++ } ++ ++#define VMSTATE_SINGLE(_field, _state, _version, _info, _type) \ ++ VMSTATE_SINGLE_TEST(_field, _state, NULL, _version, _info, _type) ++ ++#define VMSTATE_STRUCT(_field, _state, _version, _vmsd, _type) \ ++ VMSTATE_STRUCT_TEST(_field, _state, NULL, _version, _vmsd, _type) ++ ++#define VMSTATE_STRUCT_POINTER(_field, _state, _vmsd, _type) \ ++ VMSTATE_STRUCT_POINTER_V(_field, _state, 0, _vmsd, _type) ++ ++#define VMSTATE_STRUCT_ARRAY(_field, _state, _num, _version, _vmsd, _type) \ ++ VMSTATE_STRUCT_ARRAY_TEST(_field, _state, _num, NULL, _version, _vmsd, \ ++ _type) ++ ++#define VMSTATE_INT16_V(_f, _s, _v) \ ++ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_int16, int16_t) ++#define VMSTATE_INT32_V(_f, _s, _v) \ ++ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_int32, int32_t) ++ ++#define VMSTATE_UINT8_V(_f, _s, _v) \ ++ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_uint8, uint8_t) ++#define VMSTATE_UINT16_V(_f, _s, _v) \ ++ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_uint16, uint16_t) ++#define VMSTATE_UINT32_V(_f, _s, _v) \ ++ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_uint32, uint32_t) ++ ++#define VMSTATE_INT16(_f, _s) VMSTATE_INT16_V(_f, _s, 0) ++#define VMSTATE_INT32(_f, _s) VMSTATE_INT32_V(_f, _s, 0) ++ ++#define VMSTATE_UINT8(_f, _s) VMSTATE_UINT8_V(_f, _s, 0) ++#define VMSTATE_UINT16(_f, _s) VMSTATE_UINT16_V(_f, _s, 0) ++#define VMSTATE_UINT32(_f, _s) VMSTATE_UINT32_V(_f, _s, 0) ++ ++#define VMSTATE_UINT16_TEST(_f, _s, _t) \ ++ VMSTATE_SINGLE_TEST(_f, _s, _t, 0, slirp_vmstate_info_uint16, uint16_t) ++ ++#define VMSTATE_UINT32_TEST(_f, _s, _t) \ ++ VMSTATE_SINGLE_TEST(_f, _s, _t, 0, slirp_vmstate_info_uint32, uint32_t) ++ ++#define VMSTATE_INT16_ARRAY_V(_f, _s, _n, _v) \ ++ VMSTATE_ARRAY(_f, _s, _n, _v, slirp_vmstate_info_int16, int16_t) ++ ++#define VMSTATE_INT16_ARRAY(_f, _s, _n) VMSTATE_INT16_ARRAY_V(_f, _s, _n, 0) ++ ++#define VMSTATE_BUFFER_V(_f, _s, _v) \ ++ VMSTATE_STATIC_BUFFER(_f, _s, _v, NULL, 0, sizeof(typeof_field(_s, _f))) ++ ++#define VMSTATE_BUFFER(_f, _s) VMSTATE_BUFFER_V(_f, _s, 0) ++ ++#define VMSTATE_END_OF_LIST() \ ++ { \ ++ } ++ ++#endif +-- +2.27.0 + diff --git a/0004-Initial-redhat-build.patch b/0004-Initial-redhat-build.patch new file mode 100644 index 0000000..94cf91c --- /dev/null +++ b/0004-Initial-redhat-build.patch @@ -0,0 +1,313 @@ +From fc113ecd7c99646a7ced0b99570b5927ae6d595f Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 26 May 2021 10:56:02 +0200 +Subject: Initial redhat build + +This patch introduces redhat build structure in redhat subdirectory. In addition, +several issues are fixed in QEMU tree: + +- Change of app name for sasl_server_init in VNC code from qemu to qemu-kvm + - As we use qemu-kvm as name in all places, this is updated to be consistent +- Man page renamed from qemu to qemu-kvm + - man page is installed using make install so we have to fix it in qemu tree + +We disable make check due to issues with some of the tests. + +This rebase is based on qemu-kvm-6.2.0-13.el9 + +Signed-off-by: Miroslav Rezanina +-- +Rebase changes (6.1.0): +- Move build to .distro +- Move changes for support file to related commit +- Added dependency for python3-sphinx-rtd_theme +- Removed --disable-sheepdog configure option +- Added new hw-display modules +- SASL initialization moved to ui/vnc-auth-sasl.c +- Add accel-qtest- and accel-tcg-x86_64 libraries +- Added hw-usb-host module +- Disable new configure options (bpf, nvmm, slirp-smbd) +- Use -pie for ksmctl build (annocheck complain fix) + +Rebase changes (6.2.0): +- removed --disable-jemalloc and --disable-tcmalloc configure options +- added audio-oss.so +- added fdt requirement for x86_64 +- tests/acceptance renamed to tests/avocado +- added multiboot_dma.bin +- Add -Wno-string-plus-int to extra flags +- Updated configure options + +Rebase changes (7.0.0): +- Do not use -mlittle CFLAG on ppc64le +- Used upstream handling issue with ui/clipboard.c +- Use -mlittle-endian on ppc64le instead of deleteing it in configure +- Drop --disable-libxml2 option for configure (upstream) +- Remove vof roms +- Disable AVX2 support +- Use internal meson +- Disable new configure options (dbus-display and qga-vss) +- Change permissions on installing tests/Makefile.include +- Remove ssh block driver + +Merged patches (6.0.0): + - 605758c902 Limit build on Power to qemu-img and qemu-ga only + +Merged patches (6.1.0): +- f04f91751f Use cached tarballs +- 6581165c65 Remove message with running VM count +- 03c3cac9fc spec-file: build qemu-kvm without SPICE and QXL +- e0ae6c1f6c spec-file: Obsolete qemu-kvm-ui-spice +- 9d2e9f9ecf spec: Do not build qemu-kvm-block-gluster +- cf470b4234 spec: Do not link pcnet and ne2k_pci roms +- e981284a6b redhat: Install the s390-netboot.img that we've built +- 24ef557f33 spec: Remove usage of Group: tag +- c40d69b4f4 spec: Drop %defattr usage +- f8e98798ce spec: Clean up BuildRequires +- 47246b43ee spec: Remove iasl BuildRequires +- 170dc1cbe0 spec: Remove redundant 0 in conditionals +- 8718f6fa11 spec: Add more have_XXX conditionals +- a001269ce9 spec: Remove binutils versioned Requires +- 34545ee641 spec: Remove diffutils BuildRequires +- c2c82beac9 spec: Remove redundant Requires: +- 9314c231f4 spec: Add XXX_version macros +- c43db0bf0f spec: Add have_block_rbd +- 3ecb0c0319 qga: drop StandardError=syslog +- 018049dc80 Remove iscsi support +- a2edf18777 redhat: Replace the kvm-setup.service with a /etc/modules-load.d config file +- 387b5fbcfe redhat: Move qemu-kvm-docs dependency to qemu-kvm +- 4ead693178 redhat: introducting qemu-kvm-hw-usbredir +- 4dc6fc3035 redhat: use the standard vhost-user JSON path +- 84757178b4 Fix local build +- 8c394227dd spec: Restrict block drivers in tools +- b6aa7c1fae Move tools to separate package +- eafd82e509 Split qemu-pr-helper to separate package +- 2c0182e2aa spec: RPM_BUILD_ROOT -> %{buildroot} +- 91bd55ca13 spec: More use of %{name} instead of 'qemu-kvm' +- 50ba299c61 spec: Use qemu-pr-helper.service from qemu.git (partial) +- ee08d4e0a3 spec: Use %{_sourcedir} for referencing sources +- 039e7f7d02 spec: Add tools_only +- 884ba71617 spec: %build: Add run_configure helper +- 8ebd864d65 spec: %build: Disable more bits with %{disable_everything} (partial) +- f23fdb53f5 spec: %build: Add macros for some 'configure' parameters +- fe951a8bd8 spec: %files: Move qemu-guest-agent and qemu-img earlier +- 353b632e37 spec: %install: Remove redundant bits +- 9d2015b752 spec: %install: Add %{modprobe_kvm_conf} macro +- 6d05134e8c spec: %install: Remove qemu-guest-agent /etc/qemu-kvm usage +- 985b226467 spec: %install: clean up qemu-ga section +- dfaf9c600d spec: %install: Use a single %{tools_only} section +- f6978ddb46 spec: Make tools_only not cross spec sections +- 071c211098 spec: %install: Limit time spent in %{qemu_kvm_build} +- 1b65c674be spec: misc syntactic merges with Fedora +- 4da16294cf spec: Use Fedora's pattern for specifying rc version +- d7ee259a79 spec: %files: don't use fine grained -docs file list +- 64cad0c60f spec: %files: Add licenses to qemu-common too +- c3de4f080a spec: %install: Drop python3 shebang fixup +- 46fc216115 Update local build to work with spec file improvements +- bab9531548 spec: Remove buildldflags +- c8360ab6a9 spec: Use %make_build macro +- f6966c66e9 spec: Drop make install sharedir and datadir usage +- 86982421bc spec: use %make_install macro +- 191c405d22 spec: parallelize `make check` +- 251a1fb958 spec: Drop explicit --build-id +- 44c7dda6c3 spec: use %{build_ldflags} +- 0009a34354 Move virtiofsd to separate package +- 34d1b200b3 Utilize --firmware configure option +- 2800e1dd03 spec: Switch toolchain to Clang/LLVM (except process-patches.sh) +- e8a70f500f spec: Use safe-stack for x86_64 +- e29445d50d spec: Reenable write support for VMDK etc. in tools +- a4fe2a3e16 redhat: Disable LTO on non-x86 architectures + +Merged patches (6.2.0): +- 333452440b remove sgabios dependency +- 7d3633f184 enable pulseaudio +- bd898709b0 spec: disable use of gcrypt for crypto backends in favour of gnutls +- e4f0c6dee6 spec: Remove block-curl and block-ssh dependency +- 4dc13bfe63 spec: Build the VDI block driver +- d2f2ff3c74 spec: Explicitly include compress filter +- a7d047f9c2 Move ksmtuned files to separate package + +Merged patches (7.0.0): +- 098d4d08d0 spec: Rename qemu-kvm-hw-usbredir to qemu-kvm-device-usb-redirect +- c2bd0d6834 spec: Split qemu-kvm-ui-opengl +- 2c9cda805d spec: Introduce packages for virtio-gpu-* modules (changed as rhel device tree not set) +- d0414a3e0b spec: Introduce device-display-virtio-vga* packages +- 3534ec46d4 spec: Move usb-host module to separate package +- ddc14d4737 spec: Move qtest accel module to tests package +- 6f2c4befa6 spec: Extend qemu-kvm-core description +- 6f11866e4e (rhel/rhel-9.0.0) Update to qemu-kvm-6.2.0-6.el9 +- da0a28758f ui/clipboard: fix use-after-free regression +- 895d4d52eb spec: Remove qemu-virtiofsd +- c8c8c8bd84 spec: Fix obsolete for spice subpackages +- d46d2710b2 spec: Obsolete old usb redir subpackage +- 6f52a50b68 spec: Obsolete ssh driver + +Signed-off-by: Miroslav Rezanina +--- + .distro/85-kvm.preset | 5 - + .distro/Makefile | 100 + + .distro/Makefile.common | 40 + + .distro/README.tests | 39 + + .distro/ksm.service | 13 - + .distro/ksm.sysconfig | 4 - + .distro/ksmctl.c | 77 - + .distro/ksmtuned | 139 - + .distro/ksmtuned.conf | 21 - + .distro/ksmtuned.service | 12 - + .distro/kvm-setup | 49 - + .distro/kvm-setup.service | 14 - + .distro/modules-load.conf | 4 + + .distro/qemu-guest-agent.service | 1 - + .distro/qemu-kvm.spec.template | 4034 +++++++++++++++++++++++ + .distro/rpminspect.yaml | 6 +- + .distro/scripts/extract_build_cmd.py | 12 + + .gitignore | 1 + + README.systemtap | 43 + + meson.build | 4 +- + scripts/qemu-guest-agent/fsfreeze-hook | 2 +- + scripts/systemtap/conf.d/qemu_kvm.conf | 4 + + scripts/systemtap/script.d/qemu_kvm.stp | 1 + + tests/check-block.sh | 2 + + ui/vnc-auth-sasl.c | 2 +- + 25 files changed, 4290 insertions(+), 339 deletions(-) + delete mode 100644 .distro/85-kvm.preset + create mode 100644 .distro/Makefile + create mode 100644 .distro/Makefile.common + create mode 100644 .distro/README.tests + delete mode 100644 .distro/ksm.service + delete mode 100644 .distro/ksm.sysconfig + delete mode 100644 .distro/ksmctl.c + delete mode 100644 .distro/ksmtuned + delete mode 100644 .distro/ksmtuned.conf + delete mode 100644 .distro/ksmtuned.service + delete mode 100644 .distro/kvm-setup + delete mode 100644 .distro/kvm-setup.service + create mode 100644 .distro/modules-load.conf + create mode 100644 .distro/qemu-kvm.spec.template + create mode 100644 README.systemtap + create mode 100644 scripts/systemtap/conf.d/qemu_kvm.conf + create mode 100644 scripts/systemtap/script.d/qemu_kvm.stp + +diff --git a/README.systemtap b/README.systemtap +new file mode 100644 +index 0000000000..ad913fc990 +--- /dev/null ++++ b/README.systemtap +@@ -0,0 +1,43 @@ ++QEMU tracing using systemtap-initscript ++--------------------------------------- ++ ++You can capture QEMU trace data all the time using systemtap-initscript. This ++uses SystemTap's flight recorder mode to trace all running guests to a ++fixed-size buffer on the host. Old trace entries are overwritten by new ++entries when the buffer size wraps. ++ ++1. Install the systemtap-initscript package: ++ # yum install systemtap-initscript ++ ++2. Install the systemtap scripts and the conf file: ++ # cp /usr/share/qemu-kvm/systemtap/script.d/qemu_kvm.stp /etc/systemtap/script.d/ ++ # cp /usr/share/qemu-kvm/systemtap/conf.d/qemu_kvm.conf /etc/systemtap/conf.d/ ++ ++The set of trace events to enable is given in qemu_kvm.stp. This SystemTap ++script can be customized to add or remove trace events provided in ++/usr/share/systemtap/tapset/qemu-kvm-simpletrace.stp. ++ ++SystemTap customizations can be made to qemu_kvm.conf to control the flight ++recorder buffer size and whether to store traces in memory only or disk too. ++See stap(1) for option documentation. ++ ++3. Start the systemtap service. ++ # service systemtap start qemu_kvm ++ ++4. Make the service start at boot time. ++ # chkconfig systemtap on ++ ++5. Confirm that the service works. ++ # service systemtap status qemu_kvm ++ qemu_kvm is running... ++ ++When you want to inspect the trace buffer, perform the following steps: ++ ++1. Dump the trace buffer. ++ # staprun -A qemu_kvm >/tmp/trace.log ++ ++2. Start the systemtap service because the preceding step stops the service. ++ # service systemtap start qemu_kvm ++ ++3. Translate the trace record to readable format. ++ # /usr/share/qemu-kvm/simpletrace.py --no-header /usr/share/qemu-kvm/trace-events /tmp/trace.log +diff --git a/meson.build b/meson.build +index 861de93c4f..6f7e430f0f 100644 +--- a/meson.build ++++ b/meson.build +@@ -2394,7 +2394,9 @@ if capstone_opt == 'internal' + # Include all configuration defines via a header file, which will wind up + # as a dependency on the object file, and thus changes here will result + # in a rebuild. +- '-include', 'capstone-defs.h' ++ '-include', 'capstone-defs.h', ++ ++ '-Wp,-D_GLIBCXX_ASSERTIONS', + ] + + libcapstone = static_library('capstone', +diff --git a/scripts/qemu-guest-agent/fsfreeze-hook b/scripts/qemu-guest-agent/fsfreeze-hook +index 13aafd4845..e9b84ec028 100755 +--- a/scripts/qemu-guest-agent/fsfreeze-hook ++++ b/scripts/qemu-guest-agent/fsfreeze-hook +@@ -8,7 +8,7 @@ + # request, it is issued with "thaw" argument after filesystem is thawed. + + LOGFILE=/var/log/qga-fsfreeze-hook.log +-FSFREEZE_D=$(dirname -- "$0")/fsfreeze-hook.d ++FSFREEZE_D=$(dirname -- "$(realpath $0)")/fsfreeze-hook.d + + # Check whether file $1 is a backup or rpm-generated file and should be ignored + is_ignored_file() { +diff --git a/scripts/systemtap/conf.d/qemu_kvm.conf b/scripts/systemtap/conf.d/qemu_kvm.conf +new file mode 100644 +index 0000000000..372d8160a4 +--- /dev/null ++++ b/scripts/systemtap/conf.d/qemu_kvm.conf +@@ -0,0 +1,4 @@ ++# Force load uprobes (see BZ#1118352) ++stap -e 'probe process("/usr/libexec/qemu-kvm").function("main") { printf("") }' -c true ++ ++qemu_kvm_OPT="-s4" # per-CPU buffer size, in megabytes +diff --git a/scripts/systemtap/script.d/qemu_kvm.stp b/scripts/systemtap/script.d/qemu_kvm.stp +new file mode 100644 +index 0000000000..c04abf9449 +--- /dev/null ++++ b/scripts/systemtap/script.d/qemu_kvm.stp +@@ -0,0 +1 @@ ++probe qemu.kvm.simpletrace.handle_qmp_command,qemu.kvm.simpletrace.monitor_protocol_*,qemu.kvm.simpletrace.migrate_set_state {} +diff --git a/tests/check-block.sh b/tests/check-block.sh +index f59496396c..d900d8b35e 100755 +--- a/tests/check-block.sh ++++ b/tests/check-block.sh +@@ -48,6 +48,8 @@ if LANG=C bash --version | grep -q 'GNU bash, version [123]' ; then + skip "bash version too old ==> Not running the qemu-iotests." + fi + ++exit 0 ++ + cd tests/qemu-iotests + + # QEMU_CHECK_BLOCK_AUTO is used to disable some unstable sub-tests +diff --git a/ui/vnc-auth-sasl.c b/ui/vnc-auth-sasl.c +index 47fdae5b21..2a950caa2a 100644 +--- a/ui/vnc-auth-sasl.c ++++ b/ui/vnc-auth-sasl.c +@@ -42,7 +42,7 @@ + + bool vnc_sasl_server_init(Error **errp) + { +- int saslErr = sasl_server_init(NULL, "qemu"); ++ int saslErr = sasl_server_init(NULL, "qemu-kvm"); + + if (saslErr != SASL_OK) { + error_setg(errp, "Failed to initialize SASL auth: %s", +-- +2.31.1 + diff --git a/0005-Enable-disable-devices-for-RHEL.patch b/0005-Enable-disable-devices-for-RHEL.patch new file mode 100644 index 0000000..1ffbe97 --- /dev/null +++ b/0005-Enable-disable-devices-for-RHEL.patch @@ -0,0 +1,642 @@ +From 51ec7495d69fe4b4d0b61642ca6c0e7fd7a1032d Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Thu, 15 Jul 2021 03:22:36 -0400 +Subject: Enable/disable devices for RHEL + +This commit adds all changes related to changes in supported devices. + +Signed-off-by: Miroslav Rezanina +-- +Rebase notes (6.1.0): +- Added CONFIG_TPM (except s390x) +- default-configs moved to configs +- Use --with-device- configure option to use rhel configs + +Rebase notes (6.2.0): +- Add CONFIG_ISA_FDC +- Do not remove -no-hpet documentation + +Rebase notes (7.0.0): +- Added CONFIG_ARM_GIC_TCG option for aarch64 +- Fixes necessary for layout change fixes +- Renamed CONFIG_ARM_GIC_TCG to CONFIG_ARM_GICV3_TCG +- Removed upstream devices + +Merged patches (6.1.0): +- c51bf45304 Remove SPICE and QXL from x86_64-rh-devices.mak +- 02fc745601 aarch64-rh-devices: add CONFIG_PVPANIC_PCI +- f2fe835153 aarch64-rh-devices: add CONFIG_PXB +- b5431733ad disable CONFIG_USB_STORAGE_BOT +- 478ba0cdf6 Disable TPM passthrough +- 2504d68a7c aarch64: Add USB storage devices +- 51c2a3253c disable ac97 audio + +Merged patches (6.2.0): +- 9f2f9fa2ba disable sga device + +Merged patches (7.0.0): +- fd7c45a5a8 redhat: Enable virtio-mem as tech-preview on x86-64 +- c9e68ea451 Enable SGX -- RH Only +--- + .distro/qemu-kvm.spec.template | 18 +-- + .../aarch64-softmmu/aarch64-rh-devices.mak | 34 ++++++ + .../ppc64-softmmu/ppc64-rh-devices.mak | 35 ++++++ + configs/devices/rh-virtio.mak | 10 ++ + .../s390x-softmmu/s390x-rh-devices.mak | 15 +++ + .../x86_64-softmmu/x86_64-rh-devices.mak | 103 ++++++++++++++++++ + hw/acpi/ich9.c | 4 +- + hw/arm/meson.build | 2 +- + hw/block/fdc.c | 10 ++ + hw/cpu/meson.build | 5 +- + hw/display/cirrus_vga.c | 5 +- + hw/ide/piix.c | 5 +- + hw/input/pckbd.c | 2 + + hw/net/e1000.c | 2 + + hw/ppc/spapr_cpu_core.c | 2 + + hw/usb/meson.build | 2 +- + target/arm/cpu_tcg.c | 10 ++ + target/ppc/cpu-models.c | 9 ++ + target/s390x/cpu_models_sysemu.c | 3 + + target/s390x/kvm/kvm.c | 8 ++ + 20 files changed, 269 insertions(+), 15 deletions(-) + create mode 100644 configs/devices/aarch64-softmmu/aarch64-rh-devices.mak + create mode 100644 configs/devices/ppc64-softmmu/ppc64-rh-devices.mak + create mode 100644 configs/devices/rh-virtio.mak + create mode 100644 configs/devices/s390x-softmmu/s390x-rh-devices.mak + create mode 100644 configs/devices/x86_64-softmmu/x86_64-rh-devices.mak + +diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak +new file mode 100644 +index 0000000000..5f6ee1de5b +--- /dev/null ++++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak +@@ -0,0 +1,34 @@ ++include ../rh-virtio.mak ++ ++CONFIG_ARM_GIC_KVM=y ++CONFIG_ARM_GICV3_TCG=y ++CONFIG_ARM_GIC=y ++CONFIG_ARM_SMMUV3=y ++CONFIG_ARM_V7M=y ++CONFIG_ARM_VIRT=y ++CONFIG_EDID=y ++CONFIG_PCIE_PORT=y ++CONFIG_PCI_DEVICES=y ++CONFIG_PCI_TESTDEV=y ++CONFIG_PFLASH_CFI01=y ++CONFIG_SCSI=y ++CONFIG_SEMIHOSTING=y ++CONFIG_USB=y ++CONFIG_USB_XHCI=y ++CONFIG_USB_XHCI_PCI=y ++CONFIG_USB_STORAGE_CORE=y ++CONFIG_USB_STORAGE_CLASSIC=y ++CONFIG_VFIO=y ++CONFIG_VFIO_PCI=y ++CONFIG_VIRTIO_MMIO=y ++CONFIG_VIRTIO_PCI=y ++CONFIG_XIO3130=y ++CONFIG_NVDIMM=y ++CONFIG_ACPI_APEI=y ++CONFIG_TPM=y ++CONFIG_TPM_EMULATOR=y ++CONFIG_TPM_TIS_SYSBUS=y ++CONFIG_PTIMER=y ++CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y ++CONFIG_PVPANIC_PCI=y ++CONFIG_PXB=y +diff --git a/configs/devices/ppc64-softmmu/ppc64-rh-devices.mak b/configs/devices/ppc64-softmmu/ppc64-rh-devices.mak +new file mode 100644 +index 0000000000..6a3e3f0227 +--- /dev/null ++++ b/configs/devices/ppc64-softmmu/ppc64-rh-devices.mak +@@ -0,0 +1,35 @@ ++include ../rh-virtio.mak ++ ++CONFIG_DIMM=y ++CONFIG_MEM_DEVICE=y ++CONFIG_NVDIMM=y ++CONFIG_PCI=y ++CONFIG_PCI_DEVICES=y ++CONFIG_PCI_TESTDEV=y ++CONFIG_PCI_EXPRESS=y ++CONFIG_PSERIES=y ++CONFIG_SCSI=y ++CONFIG_SPAPR_VSCSI=y ++CONFIG_TEST_DEVICES=y ++CONFIG_USB=y ++CONFIG_USB_OHCI=y ++CONFIG_USB_OHCI_PCI=y ++CONFIG_USB_SMARTCARD=y ++CONFIG_USB_STORAGE_CORE=y ++CONFIG_USB_STORAGE_CLASSIC=y ++CONFIG_USB_XHCI=y ++CONFIG_USB_XHCI_NEC=y ++CONFIG_USB_XHCI_PCI=y ++CONFIG_VFIO=y ++CONFIG_VFIO_PCI=y ++CONFIG_VGA=y ++CONFIG_VGA_PCI=y ++CONFIG_VHOST_USER=y ++CONFIG_VIRTIO_PCI=y ++CONFIG_VIRTIO_VGA=y ++CONFIG_WDT_IB6300ESB=y ++CONFIG_XICS=y ++CONFIG_XIVE=y ++CONFIG_TPM=y ++CONFIG_TPM_SPAPR=y ++CONFIG_TPM_EMULATOR=y +diff --git a/configs/devices/rh-virtio.mak b/configs/devices/rh-virtio.mak +new file mode 100644 +index 0000000000..94ede1b5f6 +--- /dev/null ++++ b/configs/devices/rh-virtio.mak +@@ -0,0 +1,10 @@ ++CONFIG_VIRTIO=y ++CONFIG_VIRTIO_BALLOON=y ++CONFIG_VIRTIO_BLK=y ++CONFIG_VIRTIO_GPU=y ++CONFIG_VIRTIO_INPUT=y ++CONFIG_VIRTIO_INPUT_HOST=y ++CONFIG_VIRTIO_NET=y ++CONFIG_VIRTIO_RNG=y ++CONFIG_VIRTIO_SCSI=y ++CONFIG_VIRTIO_SERIAL=y +diff --git a/configs/devices/s390x-softmmu/s390x-rh-devices.mak b/configs/devices/s390x-softmmu/s390x-rh-devices.mak +new file mode 100644 +index 0000000000..d3b38312e1 +--- /dev/null ++++ b/configs/devices/s390x-softmmu/s390x-rh-devices.mak +@@ -0,0 +1,15 @@ ++include ../rh-virtio.mak ++ ++CONFIG_PCI=y ++CONFIG_S390_CCW_VIRTIO=y ++CONFIG_S390_FLIC=y ++CONFIG_S390_FLIC_KVM=y ++CONFIG_SCLPCONSOLE=y ++CONFIG_SCSI=y ++CONFIG_VFIO=y ++CONFIG_VFIO_AP=y ++CONFIG_VFIO_CCW=y ++CONFIG_VFIO_PCI=y ++CONFIG_VHOST_USER=y ++CONFIG_VIRTIO_CCW=y ++CONFIG_WDT_DIAG288=y +diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +new file mode 100644 +index 0000000000..d0c9e66641 +--- /dev/null ++++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +@@ -0,0 +1,103 @@ ++include ../rh-virtio.mak ++ ++CONFIG_ACPI=y ++CONFIG_ACPI_PCI=y ++CONFIG_ACPI_CPU_HOTPLUG=y ++CONFIG_ACPI_MEMORY_HOTPLUG=y ++CONFIG_ACPI_NVDIMM=y ++CONFIG_ACPI_SMBUS=y ++CONFIG_ACPI_VMGENID=y ++CONFIG_ACPI_X86=y ++CONFIG_ACPI_X86_ICH=y ++CONFIG_AHCI=y ++CONFIG_APIC=y ++CONFIG_APM=y ++CONFIG_BOCHS_DISPLAY=y ++CONFIG_DIMM=y ++CONFIG_E1000E_PCI_EXPRESS=y ++CONFIG_E1000_PCI=y ++CONFIG_EDU=y ++CONFIG_FDC=y ++CONFIG_FDC_SYSBUS=y ++CONFIG_FDC_ISA=y ++CONFIG_FW_CFG_DMA=y ++CONFIG_HDA=y ++CONFIG_HYPERV=y ++CONFIG_HYPERV_TESTDEV=y ++CONFIG_I2C=y ++CONFIG_I440FX=y ++CONFIG_I8254=y ++CONFIG_I8257=y ++CONFIG_I8259=y ++CONFIG_I82801B11=y ++CONFIG_IDE_CORE=y ++CONFIG_IDE_PCI=y ++CONFIG_IDE_PIIX=y ++CONFIG_IDE_QDEV=y ++CONFIG_IOAPIC=y ++CONFIG_IOH3420=y ++CONFIG_ISA_BUS=y ++CONFIG_ISA_DEBUG=y ++CONFIG_ISA_TESTDEV=y ++CONFIG_LPC_ICH9=y ++CONFIG_MC146818RTC=y ++CONFIG_MEM_DEVICE=y ++CONFIG_NVDIMM=y ++CONFIG_OPENGL=y ++CONFIG_PAM=y ++CONFIG_PC=y ++CONFIG_PCI=y ++CONFIG_PCIE_PORT=y ++CONFIG_PCI_DEVICES=y ++CONFIG_PCI_EXPRESS=y ++CONFIG_PCI_EXPRESS_Q35=y ++CONFIG_PCI_I440FX=y ++CONFIG_PCI_TESTDEV=y ++CONFIG_PCKBD=y ++CONFIG_PCSPK=y ++CONFIG_PC_ACPI=y ++CONFIG_PC_PCI=y ++CONFIG_PFLASH_CFI01=y ++CONFIG_PVPANIC_ISA=y ++CONFIG_PXB=y ++CONFIG_Q35=y ++CONFIG_RTL8139_PCI=y ++CONFIG_SCSI=y ++CONFIG_SERIAL=y ++CONFIG_SERIAL_ISA=y ++CONFIG_SERIAL_PCI=y ++CONFIG_SEV=y ++CONFIG_SMBIOS=y ++CONFIG_SMBUS_EEPROM=y ++CONFIG_TEST_DEVICES=y ++CONFIG_USB=y ++CONFIG_USB_EHCI=y ++CONFIG_USB_EHCI_PCI=y ++CONFIG_USB_SMARTCARD=y ++CONFIG_USB_STORAGE_CORE=y ++CONFIG_USB_STORAGE_CLASSIC=y ++CONFIG_USB_UHCI=y ++CONFIG_USB_XHCI=y ++CONFIG_USB_XHCI_NEC=y ++CONFIG_USB_XHCI_PCI=y ++CONFIG_VFIO=y ++CONFIG_VFIO_PCI=y ++CONFIG_VGA=y ++CONFIG_VGA_CIRRUS=y ++CONFIG_VGA_PCI=y ++CONFIG_VHOST_USER=y ++CONFIG_VHOST_USER_BLK=y ++CONFIG_VIRTIO_MEM=y ++CONFIG_VIRTIO_PCI=y ++CONFIG_VIRTIO_VGA=y ++CONFIG_VMMOUSE=y ++CONFIG_VMPORT=y ++CONFIG_VTD=y ++CONFIG_WDT_IB6300ESB=y ++CONFIG_WDT_IB700=y ++CONFIG_XIO3130=y ++CONFIG_TPM=y ++CONFIG_TPM_CRB=y ++CONFIG_TPM_TIS_ISA=y ++CONFIG_TPM_EMULATOR=y ++CONFIG_SGX=y +diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c +index bd9bbade70..de1e401cdf 100644 +--- a/hw/acpi/ich9.c ++++ b/hw/acpi/ich9.c +@@ -435,8 +435,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) + static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; + pm->acpi_memory_hotplug.is_enabled = true; + pm->cpu_hotplug_legacy = true; +- pm->disable_s3 = 0; +- pm->disable_s4 = 0; ++ pm->disable_s3 = 1; ++ pm->disable_s4 = 1; + pm->s4_val = 2; + pm->use_acpi_hotplug_bridge = true; + pm->keep_pci_slot_hpc = true; +diff --git a/hw/arm/meson.build b/hw/arm/meson.build +index 721a8eb8be..87ed4dd914 100644 +--- a/hw/arm/meson.build ++++ b/hw/arm/meson.build +@@ -31,7 +31,7 @@ arm_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c')) + arm_ss.add(when: 'CONFIG_ZYNQ', if_true: files('xilinx_zynq.c')) + arm_ss.add(when: 'CONFIG_SABRELITE', if_true: files('sabrelite.c')) + +-arm_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('armv7m.c')) ++#arm_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('armv7m.c')) + arm_ss.add(when: 'CONFIG_EXYNOS4', if_true: files('exynos4210.c')) + arm_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx.c', 'pxa2xx_gpio.c', 'pxa2xx_pic.c')) + arm_ss.add(when: 'CONFIG_DIGIC', if_true: files('digic.c')) +diff --git a/hw/block/fdc.c b/hw/block/fdc.c +index 347875a0cd..ca1776121f 100644 +--- a/hw/block/fdc.c ++++ b/hw/block/fdc.c +@@ -49,6 +49,8 @@ + #include "qom/object.h" + #include "fdc-internal.h" + ++#include "hw/boards.h" ++ + /********************************************************/ + /* debug Floppy devices */ + +@@ -2338,6 +2340,14 @@ void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, Error **errp) + FDrive *drive; + static int command_tables_inited = 0; + ++ /* Restricted for Red Hat Enterprise Linux: */ ++ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); ++ if (!strstr(mc->name, "-rhel7.")) { ++ error_setg(errp, "Device %s is not supported with machine type %s", ++ object_get_typename(OBJECT(dev)), mc->name); ++ return; ++ } ++ + if (fdctrl->fallback == FLOPPY_DRIVE_TYPE_AUTO) { + error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'"); + return; +diff --git a/hw/cpu/meson.build b/hw/cpu/meson.build +index 9e52fee9e7..bb71c9f3e7 100644 +--- a/hw/cpu/meson.build ++++ b/hw/cpu/meson.build +@@ -1,6 +1,7 @@ +-softmmu_ss.add(files('core.c', 'cluster.c')) ++#softmmu_ss.add(files('core.c', 'cluster.c')) ++softmmu_ss.add(files('core.c')) + + specific_ss.add(when: 'CONFIG_ARM11MPCORE', if_true: files('arm11mpcore.c')) + specific_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview_mpcore.c')) + specific_ss.add(when: 'CONFIG_A9MPCORE', if_true: files('a9mpcore.c')) +-specific_ss.add(when: 'CONFIG_A15MPCORE', if_true: files('a15mpcore.c')) ++#specific_ss.add(when: 'CONFIG_A15MPCORE', if_true: files('a15mpcore.c')) +diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c +index 3bb6a58698..6447fdb02e 100644 +--- a/hw/display/cirrus_vga.c ++++ b/hw/display/cirrus_vga.c +@@ -2945,7 +2945,10 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) + PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); + int16_t device_id = pc->device_id; + +- /* ++ warn_report("'cirrus-vga' is deprecated, " ++ "please use a different VGA card instead"); ++ ++ /* + * Follow real hardware, cirrus card emulated has 4 MB video memory. + * Also accept 8 MB/16 MB for backward compatibility. + */ +diff --git a/hw/ide/piix.c b/hw/ide/piix.c +index ce89fd0aa3..fbcf802b13 100644 +--- a/hw/ide/piix.c ++++ b/hw/ide/piix.c +@@ -232,7 +232,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) + k->device_id = PCI_DEVICE_ID_INTEL_82371SB_1; + k->class_id = PCI_CLASS_STORAGE_IDE; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); +- dc->hotpluggable = false; ++ /* Disabled for Red Hat Enterprise Linux: */ ++ dc->user_creatable = false; + } + + static const TypeInfo piix3_ide_info = { +@@ -261,6 +262,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) + k->class_id = PCI_CLASS_STORAGE_IDE; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + dc->hotpluggable = false; ++ /* Disabled for Red Hat Enterprise Linux: */ ++ dc->user_creatable = false; + } + + static const TypeInfo piix4_ide_info = { +diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c +index 4efdf75620..5143ebaa27 100644 +--- a/hw/input/pckbd.c ++++ b/hw/input/pckbd.c +@@ -814,6 +814,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) + dc->vmsd = &vmstate_kbd_isa; + isa->build_aml = i8042_build_aml; + set_bit(DEVICE_CATEGORY_INPUT, dc->categories); ++ /* Disabled for Red Hat Enterprise Linux: */ ++ dc->user_creatable = false; + } + + static const TypeInfo i8042_info = { +diff --git a/hw/net/e1000.c b/hw/net/e1000.c +index f5bc81296d..282d01e374 100644 +--- a/hw/net/e1000.c ++++ b/hw/net/e1000.c +@@ -1821,6 +1821,7 @@ static const E1000Info e1000_devices[] = { + .revision = 0x03, + .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, + }, ++#if 0 /* Disabled for Red Hat Enterprise Linux 7 */ + { + .name = "e1000-82544gc", + .device_id = E1000_DEV_ID_82544GC_COPPER, +@@ -1833,6 +1834,7 @@ static const E1000Info e1000_devices[] = { + .revision = 0x03, + .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, + }, ++#endif + }; + + static void e1000_register_types(void) +diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c +index 8a4861f45a..fcb5dfe792 100644 +--- a/hw/ppc/spapr_cpu_core.c ++++ b/hw/ppc/spapr_cpu_core.c +@@ -379,10 +379,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { + .instance_size = sizeof(SpaprCpuCore), + .class_size = sizeof(SpaprCpuCoreClass), + }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_SPAPR_CPU_CORE_TYPE("970_v2.2"), + DEFINE_SPAPR_CPU_CORE_TYPE("970mp_v1.0"), + DEFINE_SPAPR_CPU_CORE_TYPE("970mp_v1.1"), + DEFINE_SPAPR_CPU_CORE_TYPE("power5+_v2.1"), ++#endif + DEFINE_SPAPR_CPU_CORE_TYPE("power7_v2.3"), + DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), + DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), +diff --git a/hw/usb/meson.build b/hw/usb/meson.build +index de853d780d..0776ae6a20 100644 +--- a/hw/usb/meson.build ++++ b/hw/usb/meson.build +@@ -52,7 +52,7 @@ softmmu_ss.add(when: 'CONFIG_USB_SMARTCARD', if_true: files('dev-smartcard-reade + if cacard.found() + usbsmartcard_ss = ss.source_set() + usbsmartcard_ss.add(when: 'CONFIG_USB_SMARTCARD', +- if_true: [cacard, files('ccid-card-emulated.c', 'ccid-card-passthru.c')]) ++ if_true: [cacard, files('ccid-card-passthru.c')]) + hw_usb_modules += {'smartcard': usbsmartcard_ss} + endif + +diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c +index 13d0e9b195..3826fa5122 100644 +--- a/target/arm/cpu_tcg.c ++++ b/target/arm/cpu_tcg.c +@@ -22,6 +22,7 @@ + /* CPU models. These are not needed for the AArch64 linux-user build. */ + #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) + static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) + { +@@ -375,6 +376,7 @@ static void cortex_a9_initfn(Object *obj) + cpu->ccsidr[1] = 0x200fe019; /* 16k L1 icache. */ + define_arm_cp_regs(cpu, cortexa9_cp_reginfo); + } ++#endif /* disabled for RHEL */ + + #ifndef CONFIG_USER_ONLY + static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) +@@ -400,6 +402,7 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { + REGINFO_SENTINEL + }; + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void cortex_a7_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -445,6 +448,7 @@ static void cortex_a7_initfn(Object *obj) + cpu->ccsidr[2] = 0x711fe07a; /* 4096K L2 unified cache */ + define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */ + } ++#endif /* disabled for RHEL */ + + static void cortex_a15_initfn(Object *obj) + { +@@ -488,6 +492,7 @@ static void cortex_a15_initfn(Object *obj) + define_arm_cp_regs(cpu, cortexa15_cp_reginfo); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void cortex_m0_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -928,6 +933,7 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) + + cc->gdb_core_xml_file = "arm-m-profile.xml"; + } ++#endif /* disabled for RHEL */ + + #ifndef TARGET_AARCH64 + /* +@@ -1007,6 +1013,7 @@ static void arm_max_initfn(Object *obj) + #endif /* !TARGET_AARCH64 */ + + static const ARMCPUInfo arm_tcg_cpus[] = { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { .name = "arm926", .initfn = arm926_initfn }, + { .name = "arm946", .initfn = arm946_initfn }, + { .name = "arm1026", .initfn = arm1026_initfn }, +@@ -1022,7 +1029,9 @@ static const ARMCPUInfo arm_tcg_cpus[] = { + { .name = "cortex-a7", .initfn = cortex_a7_initfn }, + { .name = "cortex-a8", .initfn = cortex_a8_initfn }, + { .name = "cortex-a9", .initfn = cortex_a9_initfn }, ++#endif /* disabled for RHEL */ + { .name = "cortex-a15", .initfn = cortex_a15_initfn }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { .name = "cortex-m0", .initfn = cortex_m0_initfn, + .class_init = arm_v7m_class_init }, + { .name = "cortex-m3", .initfn = cortex_m3_initfn, +@@ -1053,6 +1062,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { + { .name = "pxa270-b1", .initfn = pxa270b1_initfn }, + { .name = "pxa270-c0", .initfn = pxa270c0_initfn }, + { .name = "pxa270-c5", .initfn = pxa270c5_initfn }, ++#endif /* disabled for RHEL */ + #ifndef TARGET_AARCH64 + { .name = "max", .initfn = arm_max_initfn }, + #endif +diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c +index 976be5e0d1..dd78883410 100644 +--- a/target/ppc/cpu-models.c ++++ b/target/ppc/cpu-models.c +@@ -66,6 +66,7 @@ + #define POWERPC_DEF(_name, _pvr, _type, _desc) \ + POWERPC_DEF_SVR(_name, _desc, _pvr, POWERPC_SVR_NONE, _type) + ++#if 0 /* Embedded and 32-bit CPUs disabled for Red Hat Enterprise Linux */ + /* Embedded PowerPC */ + /* PowerPC 405 family */ + /* PowerPC 405 cores */ +@@ -698,8 +699,10 @@ + "PowerPC 7447A v1.2 (G4)") + POWERPC_DEF("7457a_v1.2", CPU_POWERPC_74x7A_v12, 7455, + "PowerPC 7457A v1.2 (G4)") ++#endif + /* 64 bits PowerPC */ + #if defined(TARGET_PPC64) ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + POWERPC_DEF("970_v2.2", CPU_POWERPC_970_v22, 970, + "PowerPC 970 v2.2") + POWERPC_DEF("970fx_v1.0", CPU_POWERPC_970FX_v10, 970, +@@ -718,6 +721,7 @@ + "PowerPC 970MP v1.1") + POWERPC_DEF("power5+_v2.1", CPU_POWERPC_POWER5P_v21, POWER5P, + "POWER5+ v2.1") ++#endif + POWERPC_DEF("power7_v2.3", CPU_POWERPC_POWER7_v23, POWER7, + "POWER7 v2.3") + POWERPC_DEF("power7+_v2.1", CPU_POWERPC_POWER7P_v21, POWER7, +@@ -897,12 +901,15 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { + { "7447a", "7447a_v1.2" }, + { "7457a", "7457a_v1.2" }, + { "apollo7pm", "7457a_v1.0" }, ++#endif + #if defined(TARGET_PPC64) ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { "970", "970_v2.2" }, + { "970fx", "970fx_v3.1" }, + { "970mp", "970mp_v1.1" }, + { "power5+", "power5+_v2.1" }, + { "power5gs", "power5+_v2.1" }, ++#endif + { "power7", "power7_v2.3" }, + { "power7+", "power7+_v2.1" }, + { "power8e", "power8e_v2.1" }, +@@ -912,6 +919,7 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { + { "power10", "power10_v2.0" }, + #endif + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + /* Generic PowerPCs */ + #if defined(TARGET_PPC64) + { "ppc64", "970fx_v3.1" }, +@@ -919,5 +927,6 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { + { "ppc32", "604" }, + { "ppc", "604" }, + { "default", "604" }, ++#endif + { NULL, NULL } + }; +diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c +index 05c3ccaaff..6a04ccab1b 100644 +--- a/target/s390x/cpu_models_sysemu.c ++++ b/target/s390x/cpu_models_sysemu.c +@@ -36,6 +36,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, + (max_model->def->gen == model->def->gen && + max_model->def->ec_ga < model->def->ec_ga)) { + list_add_feat("type", unavailable); ++ } else if (model->def->gen < 11 && kvm_enabled()) { ++ /* Older CPU models are not supported on Red Hat Enterprise Linux */ ++ list_add_feat("type", unavailable); + } + + /* detect missing features if any to properly report them */ +diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c +index 6acf14d5ec..74f089d87f 100644 +--- a/target/s390x/kvm/kvm.c ++++ b/target/s390x/kvm/kvm.c +@@ -2512,6 +2512,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) + error_setg(errp, "KVM doesn't support CPU models"); + return; + } ++ ++ /* Older CPU models are not supported on Red Hat Enterprise Linux */ ++ if (model->def->gen < 11) { ++ error_setg(errp, "KVM: Unsupported CPU type specified: %s", ++ MACHINE(qdev_get_machine())->cpu_type); ++ return; ++ } ++ + prop.cpuid = s390_cpuid_from_cpu_model(model); + prop.ibc = s390_ibc_from_cpu_model(model); + /* configure cpu features indicated via STFL(e) */ +-- +2.31.1 + diff --git a/0005-Initial-redhat-build.patch b/0005-Initial-redhat-build.patch new file mode 100644 index 0000000..ddae98d --- /dev/null +++ b/0005-Initial-redhat-build.patch @@ -0,0 +1,351 @@ +From 19ce5ff93ddd6b8a998348f2a5f59f603c5e11b7 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 12 Oct 2018 07:31:11 +0200 +Subject: Initial redhat build + +This patch introduces redhat build structure in redhat subdirectory. In addition, +several issues are fixed in QEMU tree: + + - Change of app name for sasl_server_init in VNC code from qemu to qemu-kvm + - As we use qemu-kvm as name in all places, this is updated to be consistent + - Man page renamed from qemu to qemu-kvm + - man page is installed using make install so we have to fix it in qemu tree + +This rebase includes changes up to qemu-kvm-6.1.0-5.el9 + +Rebase notes (3.1.0): +- added new configure options + +Rebase notes (4.0.0): +- Added dependency to perl-Test-Harness (upstream) +- Added dependency to python3-sphinx (upstream) +- Change location of icons (upstream) +- Remove .desktop file (added upstream) +- Added qemu-trace-stap (added upstream) +- Removed elf2dmp (added upstream) +- Remove .buildinfo +- Added pvh.bin rom (added upstream) +- Added interop documentation files +- Use python module instead of qemu.py (upstream) + +Rebase notes (4.1.0): +- Remove edk2 files generated by build +- Switch to rhel-8.1-candidate build target +- Remove specs documentation +- Switched from libssh2 to libssh +- Add rc0 tarball usage hacks +- Added BuildRequires for wget, rpm-build and python3-sphinx +- Removed new unpacked files +- Update configure line to use new options + +Rebase notes (4.2.0): +- Disable iotest run during make check +- README renamed to README.rst (upstream) +- Removed ui-spice-app.so +- Added relevant changes from "505f7f4 redhat: Adding slirp to the exploded tree" +- Removed qemu-ga.8 install from spec file - installed by make +- Removed spapr-rtas.bin (upstream) +- Require newer SLOF (20191022) + +Rebase notes (5.1.0): +- Use python3 for virtio_seg_max_adjust.py test +- Removed qemu-trace-stap shebang from spec file +- Added virtiofsd.1 (upstream) +- Use out-of-tree build +- New documentation structure (upstream) +- Update local build +- Removing installed qemu-storage-daemon (added upstream) +- Removing opensbi-riscv32-sifive_u-fw_jump.bin (added upstream) +- Disable iotests (moved from Enable make check commit) +- Added missing configure options +- Reorder configure options +- qemu-pr-helper moved to /usr/libexec/ (upstream) +- Added submodules for usb-redir, smartcard-reader and qxl display (upstream) +- Added setting rc version in Makefile for build +- removed --disable-vxhs configure option (removed upstream) +- bumped required libusbx-devel version to 1.0.23 +- bumped libfdt version to 1.6.0 + +Rebase notes (5.2.0 rc0): +- Move libfdt dependency to qemu-kvm-core +- Move manpage rename from Makefile to spec file +- rename with-confsuffix configure option to with-suffix (upstream) +- Bump libusbx Requires version to 1.0.234 +- Manual copy of keymaps in spec file (BZ 1875217) +- Removed /usr/share/qemu-kvm/npcm7xx_bootrom.bin, considering it + unpackaged for now. +- Removed /usr/share/qemu-kvm/qboot.rom, considering unpackaged. +- Added build dependency for meson and ninja-build +- hw/s390/s390-pci-vfio.c hack - set NULL for g_autofree variables +- Removed Chanelog (upstream) +- Fix in directory used for docs (upstream add %name so we do not pass it in configure) +- Package various .so as part of qemu-kvm-core package. + +Rebase notes (5.2.0 rc2): +- Added fix for dtrace build on RHEL 8.4.0 + +Rebase notes (5.2.0 rc3): +- Added man page for qemu-pr-helper +- Added new configure options +- Update qemu-kiwi patches to v4 + +Rebase notes (6.0.0): +- update tracetool usage in spec file +- remove qemu-storage-daemon-qmp-ref man page +- remove qemu-storage-daemon man page +- Added devel documentation +- do not package virtfs-proxy-helper files +- Use --with-git-submodules instead of --(enable|disable)-git-update +- Minor build fixes for sending upstream +- g_autofree initialization fixed upstream +- Updated rc information usage +- do not package package hw-s390x-virtio-gpu-ccw.so +- Disable new switch options + +Rebase notes (6.1.0): +- Fix warning issue in block.c +- Download tarball from dist-git cache +- Removed sheepdog driver +- Added new display modules: + - hw-display-virtio-gpu-gl.so + - hw-display-virtio-gpu-pci-gl.so + - hw-display-virtio-vga-gl.so +- sasl fix moved from ui/vnc.c to ui/vnc-auth-sasl.c +- Added accel-qtest-%{kvm_target} and accel-tcg-%{kvm_target} +- Added about docs +- Use -q option for setup +- Added hw-usb-host.so +- Disable new options (bpf, nvmm, slirp-smbd) + +Rebase notes (6.2.0): +- Using internal meson +- removed --disable-jemalloc and --disable-tcmalloc configure options +- added audio-oss.so +- added fdt requirement for x86_64 +- tests/acceptance renamed to tests/avocado +- added multiboot_dma.bin +- Removed conflict relics +- Updated configure options + +Merged patches (3.1.0): +- 01f0c9f RHEL8: Add disable configure options to qemu spec file +- Spec file cleanups + +Merged patches (4.0.0): +- aa4297c Add edk2 Requires to qemu-kvm +- d124ff5779 Fixing brew build target +- eb204b5 Introduce the qemu-kvm-tests rpm +- 223cf0c Load kvm module during boot (partial) + +Merged patches (4.1.0): +- ebb6e97 redhat: Fix LOCALVERSION creation +- b0ab0cc redhat: enable tpmdev passthrough (not disabling tests) +- 7cb3c4a Enable libpmem to support nvdimm +- 8943607 qemu-kvm.spec: bump libseccomp >= 2.4.0 +- 27b7c44 rh: set CONFIG_BOCHS_DISPLAY=y for x86 (partial) +- e1fe9fe x86_64-rh-devices: enable TPM emulation (partial) + +Merged patches (4.2.0): +- 69e1fb2 enable virgla +- d4f6115 enable virgl, for real this time ... + +Merged patches (5.1.0): +- 5edf6bd Add support for rh-brew-module +- f77d52d redhat: ship virtiofsd vhost-user device backend +- 63f12d4 redhat: Always use module build target for rh-brew (modified) +- 9b1e140 redhat: updating the modular target +- 44b8bd0 spec: Fix python shenigans for tests + +Merged patches (5.2.0 rc0): +- 9238ce7 Add support for simpletrace +- 5797cff Remove explicit glusterfs-api dependency +- fd62478 disable virgl +- 0205018 redhat: link /etc/qemu-ga/fsfreeze-hook to /etc/qemu-kvm/ +- 3645097 redhat: Make all generated so files executable (not only block-*) + +Merged patches (5.2.0 rc2): +- pjw 99657 redhat: introduces disable_everything macro into the configure call +- pjw 99659 redhat: scripts/extract_build_cmd.py - Avoid listing empty lines +- pjw 99658 redhat: Fixing rh-local build +- pjw 99660 redhat: Add qemu-kiwi subpackage +- d2e59ce redhat: add (un/pre)install systemd hooks for qemu-ga + +Merged patches (5.2.0 rc3): +- pjw 99887 - redhat: allow Makefile rh-prep builddep to fail +- pjw 99885 - redhat: adding rh-rpm target + +Merged patches (6.0.0): +- 5ab9954a3b spec: find system python via meson +- cd0f7db11f build-system: use b_staticpic=false +- 80d2dec42c udev-kvm-check: remove the "exceeded subscription limit" message +- 38959d51c0 redhat: Allow make to inherit params from parent make for rh-local +- 1e0cfe458f redhat: moving all documentation files to qemu-kvm-docs +- d7a594d02b redhat: makes qemu respect system's crypto profile +- e2bbf1572b spec: Package qemu-storage-daemon +- 92f10993ba spec: ui-spice sub-package +- 8931e46069 spec: ui-opengl sub-package + +Merged patches (6.1.0): +- 7bb57541b3 redhat: Install the s390-netboot.img that we've built +- b4a8531f41 redhat: Fix "unversioned Obsoletes" warning +- 141a1693c7 redhat: Move qemu-kvm-docs dependency to qemu-kvm +- d75f59c6f9 redhat: introducting qemu-kvm-hw-usbredir +- a934d8bf44 redhat: use the standard vhost-user JSON path + +Merged patches (6.2.0): +- 4f3f04bbb6 spec: Remove qemu-kiwi build +--- + .gitignore | 1 + + .gitlab-ci.yml | 24 - + .gitlab/issue_templates/bug.md | 64 - + .gitlab/issue_templates/feature_request.md | 32 - + README.systemtap | 43 + + meson.build | 4 +- + redhat/Makefile | 90 + + redhat/Makefile.common | 48 + + redhat/README.tests | 39 + + redhat/qemu-kvm.spec.template | 3896 ++++++++++++++++++++ + redhat/scripts/extract_build_cmd.py | 5 +- + redhat/scripts/process-patches.sh | 20 +- + redhat/scripts/tarball_checksum.sh | 2 +- + redhat/udev-kvm-check.c | 19 +- + scripts/qemu-guest-agent/fsfreeze-hook | 2 +- + scripts/systemtap/conf.d/qemu_kvm.conf | 4 + + scripts/systemtap/script.d/qemu_kvm.stp | 1 + + tests/check-block.sh | 2 + + ui/vnc-auth-sasl.c | 2 +- + 19 files changed, 4142 insertions(+), 156 deletions(-) + delete mode 100644 .gitlab-ci.yml + delete mode 100644 .gitlab/issue_templates/bug.md + delete mode 100644 .gitlab/issue_templates/feature_request.md + create mode 100644 README.systemtap + create mode 100644 redhat/Makefile + create mode 100644 redhat/Makefile.common + create mode 100644 redhat/README.tests + create mode 100644 redhat/qemu-kvm.spec.template + create mode 100644 scripts/systemtap/conf.d/qemu_kvm.conf + create mode 100644 scripts/systemtap/script.d/qemu_kvm.stp + +diff --git a/README.systemtap b/README.systemtap +new file mode 100644 +index 0000000000..ad913fc990 +--- /dev/null ++++ b/README.systemtap +@@ -0,0 +1,43 @@ ++QEMU tracing using systemtap-initscript ++--------------------------------------- ++ ++You can capture QEMU trace data all the time using systemtap-initscript. This ++uses SystemTap's flight recorder mode to trace all running guests to a ++fixed-size buffer on the host. Old trace entries are overwritten by new ++entries when the buffer size wraps. ++ ++1. Install the systemtap-initscript package: ++ # yum install systemtap-initscript ++ ++2. Install the systemtap scripts and the conf file: ++ # cp /usr/share/qemu-kvm/systemtap/script.d/qemu_kvm.stp /etc/systemtap/script.d/ ++ # cp /usr/share/qemu-kvm/systemtap/conf.d/qemu_kvm.conf /etc/systemtap/conf.d/ ++ ++The set of trace events to enable is given in qemu_kvm.stp. This SystemTap ++script can be customized to add or remove trace events provided in ++/usr/share/systemtap/tapset/qemu-kvm-simpletrace.stp. ++ ++SystemTap customizations can be made to qemu_kvm.conf to control the flight ++recorder buffer size and whether to store traces in memory only or disk too. ++See stap(1) for option documentation. ++ ++3. Start the systemtap service. ++ # service systemtap start qemu_kvm ++ ++4. Make the service start at boot time. ++ # chkconfig systemtap on ++ ++5. Confirm that the service works. ++ # service systemtap status qemu_kvm ++ qemu_kvm is running... ++ ++When you want to inspect the trace buffer, perform the following steps: ++ ++1. Dump the trace buffer. ++ # staprun -A qemu_kvm >/tmp/trace.log ++ ++2. Start the systemtap service because the preceding step stops the service. ++ # service systemtap start qemu_kvm ++ ++3. Translate the trace record to readable format. ++ # /usr/share/qemu-kvm/simpletrace.py --no-header /usr/share/qemu-kvm/trace-events /tmp/trace.log +diff --git a/meson.build b/meson.build +index 96de1a6ef9..5f6ba86dbb 100644 +--- a/meson.build ++++ b/meson.build +@@ -2108,7 +2108,9 @@ if capstone_opt == 'internal' + # Include all configuration defines via a header file, which will wind up + # as a dependency on the object file, and thus changes here will result + # in a rebuild. +- '-include', 'capstone-defs.h' ++ '-include', 'capstone-defs.h', ++ ++ '-Wp,-D_GLIBCXX_ASSERTIONS', + ] + + libcapstone = static_library('capstone', +diff --git a/scripts/qemu-guest-agent/fsfreeze-hook b/scripts/qemu-guest-agent/fsfreeze-hook +index 13aafd4845..e9b84ec028 100755 +--- a/scripts/qemu-guest-agent/fsfreeze-hook ++++ b/scripts/qemu-guest-agent/fsfreeze-hook +@@ -8,7 +8,7 @@ + # request, it is issued with "thaw" argument after filesystem is thawed. + + LOGFILE=/var/log/qga-fsfreeze-hook.log +-FSFREEZE_D=$(dirname -- "$0")/fsfreeze-hook.d ++FSFREEZE_D=$(dirname -- "$(realpath $0)")/fsfreeze-hook.d + + # Check whether file $1 is a backup or rpm-generated file and should be ignored + is_ignored_file() { +diff --git a/scripts/systemtap/conf.d/qemu_kvm.conf b/scripts/systemtap/conf.d/qemu_kvm.conf +new file mode 100644 +index 0000000000..372d8160a4 +--- /dev/null ++++ b/scripts/systemtap/conf.d/qemu_kvm.conf +@@ -0,0 +1,4 @@ ++# Force load uprobes (see BZ#1118352) ++stap -e 'probe process("/usr/libexec/qemu-kvm").function("main") { printf("") }' -c true ++ ++qemu_kvm_OPT="-s4" # per-CPU buffer size, in megabytes +diff --git a/scripts/systemtap/script.d/qemu_kvm.stp b/scripts/systemtap/script.d/qemu_kvm.stp +new file mode 100644 +index 0000000000..c04abf9449 +--- /dev/null ++++ b/scripts/systemtap/script.d/qemu_kvm.stp +@@ -0,0 +1 @@ ++probe qemu.kvm.simpletrace.handle_qmp_command,qemu.kvm.simpletrace.monitor_protocol_*,qemu.kvm.simpletrace.migrate_set_state {} +diff --git a/tests/check-block.sh b/tests/check-block.sh +index f86cb863de..6d38340d49 100755 +--- a/tests/check-block.sh ++++ b/tests/check-block.sh +@@ -69,6 +69,8 @@ else + fi + fi + ++exit 0 ++ + cd tests/qemu-iotests + + # QEMU_CHECK_BLOCK_AUTO is used to disable some unstable sub-tests +diff --git a/ui/vnc-auth-sasl.c b/ui/vnc-auth-sasl.c +index 47fdae5b21..2a950caa2a 100644 +--- a/ui/vnc-auth-sasl.c ++++ b/ui/vnc-auth-sasl.c +@@ -42,7 +42,7 @@ + + bool vnc_sasl_server_init(Error **errp) + { +- int saslErr = sasl_server_init(NULL, "qemu"); ++ int saslErr = sasl_server_init(NULL, "qemu-kvm"); + + if (saslErr != SASL_OK) { + error_setg(errp, "Failed to initialize SASL auth: %s", +-- +2.27.0 + diff --git a/0006-Enable-disable-devices-for-RHEL.patch b/0006-Enable-disable-devices-for-RHEL.patch new file mode 100644 index 0000000..a3fa5d1 --- /dev/null +++ b/0006-Enable-disable-devices-for-RHEL.patch @@ -0,0 +1,795 @@ +From 3d5a82d172345d17e300672909835262ff9dc917 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 2 Sep 2020 09:11:07 +0200 +Subject: Enable/disable devices for RHEL + +This commit adds all changes related to changes in supported devices. + +Signed-off-by: Miroslav Rezanina + +Rebase notes (qemu 3.1.0) +- spapr_rng disabled in default_config +- new hyperv.mak in default configs +- Move changes from x86_64-softmmu.mak to i386-softmmu.mak +- Added CONFIG_VIRTIO_MMIO to aarch64-softmmu.mak +- Removed config_vga_isa.c changes as no longer needed +- Removed new devices + +Rebase notes (4.0.0): +- Added CONFIG_PCI_EXPRESS_GENERIC_BRIDGE for aarch64-softmmu.mak +- Added CONFIG_ARM_VIRT for aarch64-softmmu.mak +- Switch to KConfig (upstream) + - Using device whitelist + without-defualt-devices option + +Rebase notes (4.1.0): +- Added CONFIG_USB_OHCI_PCI for ppc64 +- Added CONFIG_XIVE_KVM for ppc64 +- Added CONFIG_ACPI_PCI for x86_64 +- Added CONFIG_SEMIHOSTING for aarch64 +- Cleanup aarch64 devices +- Do not build a15mpcore.c +- Removed ide-isa.c stub file +- Use CONFIG_USB_EHCI_PCI on x86_64 (new upstream) + +Rebase notes (4.2.0-rc0): +- Use conditional build for isa-superio.c (upstream change) +- Rename PCI_PIIX to PCI_I440FX (upstream change) + +Rebase notes (4.2.0-rc3): +- Disabled ccid-card-emulated (patch 92566) +- Disabled vfio-pci-igd-lpc-bridge (patch 92565) + +Rebase notes (5.1.0): +- added CONFIG_PCI_EXPRESS on ppc64 (due to upstream dependency) +- Added CONFIG_NVDIMM +- updated cortex-15 disabling to upstream code +- Add CONFIG_ACPI_APEI for aarch64 +- removed obsolete hw/bt/Makefile.objs chunk +- removed unnecessary changes in target/i386/cpu.c + +Rebase notes (5.2.0 rc0): +- Added CONFIG_USB_XHCI_PCI on aarch64 ppc64 and x86_64 +- remove vl.c hack for no hpet +- Enable CONFIG_PTIMER for aarch64 +- Do not package hw-display-virtio-gpu.so on s390x + +Rebase notes (5.2.0 rc1): +- Added CONFIG_ARM_GIC for aarch64 (required for build) + +Rebase notes (weekly-210113): +- Removed XICS_KVM, XICS_SPAPR, XIVE_KVM and XIVE_SPAPR config (removed upstream) + +Rebase notes (weekly-210120): +- Add CONFIG_ARM_COMPATIBLE_SEMIHOSTING option + +Rebase notes (weekly-210203): +- Rename CONFIG_PVPANIC to CONFIG_PVPANIC_ISA + +Rebase notes (weekly-210317): +- Add new USB_STORAGE_CORE and USB_STORAGE_CLASSIC config for ppc64 and x86_64 +- Update disabling TCG cpus for AArch64 + +Rebase notes (weekly-210519): +- Do not use CONFIG_SPICE and CONFIG_OPENGL in default configs + +Rebase notes (weekly-210623): +- Add CONFIG_TPM for archs with used TPM functionality + +Rebase notes (weekly-210714): +- default_configs moved to configs + +Rebase notes (6.1.0 rc2): +- Use --with-device-ARCH configure option to use redhat config files + +Rebase notes (6.2.0 rc3): +- Do not remove -no-hpet documentation +Merged patches (qemu 3.1.0): +- d51e082 Re-enable CONFIG_HYPERV_TESTDEV +- 4b889f3 Declare cirrus-vga as deprecated +- b579d32 Do not build bluetooth support +- 3eef52a Disable CONFIG_IPMI and CONFIG_I2C for ppc64 +- 9caf292 Disable CONFIG_CAN_BUS and CONFIG_CAN_SJA1000 + +Merged patches (4.1.0): +- 20a51f6 fdc: Revert downstream disablement of device "floppy" +- f869cc0 fdc: Restrict floppy controllers to RHEL-7 machine types +- 5909721 aarch64: Compile out IOH3420 +- 27b7c44 rh: set CONFIG_BOCHS_DISPLAY=y for x86 (partial) +- 495a27d x86_64-rh-devices: add missing TPM passthrough +- e1fe9fe x86_64-rh-devices: enable TPM emulation (partial) + +Merged patches (4.2.0): +- f7587dd RHEL: disable hostmem-memfd + +Merged patches (5.1.0): +- 4543a3c i386: Remove cpu64-rhel6 CPU model +- 96533 aarch64: Remove tcg cpu types (pjw commit) +- 559d589 Revert "RHEL: disable hostmem-memfd" +- 441128e enable ramfb + +Merged patches (5.2.0 rc0): +- f70eb50 RHEL-only: Enable vTPM for POWER in downstream configs +- 69d8ae7 redhat: fix 5.0 rebase missing ISA TPM TIS +- 8310f89 RHEL-only: Enable vTPM for ARM in downstream configs +- 4a8ccfd Disable TPM passthrough backend on ARM + +Merged patches (6.0.0): +- ff817df9e3 config: enable VFIO_CCW +- 70d3924521 redhat: Add some devices for exporting upstream machine types + - without machine type chunks +- efac91b2b4 default-configs: Enable vhost-user-blk + +Merged patches (weekly-210630): +- 59a178acff disable CONFIG_USB_STORAGE_BOT + +Merged patches (6.1.0 rc2): +- 86f0025f16 aarch64: Add USB storage devices +--- + .../aarch64-softmmu/aarch64-rh-devices.mak | 31 ++++++ + .../ppc64-softmmu/ppc64-rh-devices.mak | 36 ++++++ + configs/devices/rh-virtio.mak | 10 ++ + .../s390x-softmmu/s390x-rh-devices.mak | 16 +++ + .../x86_64-softmmu/x86_64-rh-devices.mak | 104 ++++++++++++++++++ + .../x86_64-upstream-devices.mak | 4 + + hw/acpi/ich9.c | 4 +- + hw/arm/meson.build | 2 +- + hw/block/fdc.c | 10 ++ + hw/char/parallel.c | 9 ++ + hw/cpu/meson.build | 5 +- + hw/display/cirrus_vga.c | 3 + + hw/ide/piix.c | 5 +- + hw/input/pckbd.c | 2 + + hw/net/e1000.c | 2 + + hw/ppc/spapr_cpu_core.c | 2 + + hw/timer/hpet.c | 8 ++ + hw/usb/meson.build | 2 +- + redhat/qemu-kvm.spec.template | 9 +- + target/arm/cpu_tcg.c | 10 ++ + target/ppc/cpu-models.c | 10 ++ + target/s390x/cpu_models_sysemu.c | 3 + + target/s390x/kvm/kvm.c | 8 ++ + 23 files changed, 286 insertions(+), 9 deletions(-) + create mode 100644 configs/devices/aarch64-softmmu/aarch64-rh-devices.mak + create mode 100644 configs/devices/ppc64-softmmu/ppc64-rh-devices.mak + create mode 100644 configs/devices/rh-virtio.mak + create mode 100644 configs/devices/s390x-softmmu/s390x-rh-devices.mak + create mode 100644 configs/devices/x86_64-softmmu/x86_64-rh-devices.mak + create mode 100644 configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak + +diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak +new file mode 100644 +index 0000000000..0d4f9e6e4b +--- /dev/null ++++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak +@@ -0,0 +1,31 @@ ++include ../rh-virtio.mak ++ ++CONFIG_ARM_GIC_KVM=y ++CONFIG_ARM_GIC=y ++CONFIG_ARM_SMMUV3=y ++CONFIG_ARM_V7M=y ++CONFIG_ARM_VIRT=y ++CONFIG_EDID=y ++CONFIG_PCIE_PORT=y ++CONFIG_PCI_DEVICES=y ++CONFIG_PCI_TESTDEV=y ++CONFIG_PFLASH_CFI01=y ++CONFIG_SCSI=y ++CONFIG_SEMIHOSTING=y ++CONFIG_USB=y ++CONFIG_USB_XHCI=y ++CONFIG_USB_XHCI_PCI=y ++CONFIG_USB_STORAGE_CORE=y ++CONFIG_USB_STORAGE_CLASSIC=y ++CONFIG_VFIO=y ++CONFIG_VFIO_PCI=y ++CONFIG_VIRTIO_MMIO=y ++CONFIG_VIRTIO_PCI=y ++CONFIG_XIO3130=y ++CONFIG_NVDIMM=y ++CONFIG_ACPI_APEI=y ++CONFIG_TPM=y ++CONFIG_TPM_EMULATOR=y ++CONFIG_TPM_TIS_SYSBUS=y ++CONFIG_PTIMER=y ++CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y +diff --git a/configs/devices/ppc64-softmmu/ppc64-rh-devices.mak b/configs/devices/ppc64-softmmu/ppc64-rh-devices.mak +new file mode 100644 +index 0000000000..73e3ee0293 +--- /dev/null ++++ b/configs/devices/ppc64-softmmu/ppc64-rh-devices.mak +@@ -0,0 +1,36 @@ ++include ../rh-virtio.mak ++ ++CONFIG_DIMM=y ++CONFIG_MEM_DEVICE=y ++CONFIG_NVDIMM=y ++CONFIG_PCI=y ++CONFIG_PCI_DEVICES=y ++CONFIG_PCI_TESTDEV=y ++CONFIG_PCI_EXPRESS=y ++CONFIG_PSERIES=y ++CONFIG_SCSI=y ++CONFIG_SPAPR_VSCSI=y ++CONFIG_TEST_DEVICES=y ++CONFIG_USB=y ++CONFIG_USB_OHCI=y ++CONFIG_USB_OHCI_PCI=y ++CONFIG_USB_SMARTCARD=y ++CONFIG_USB_STORAGE_CORE=y ++CONFIG_USB_STORAGE_CLASSIC=y ++CONFIG_USB_XHCI=y ++CONFIG_USB_XHCI_NEC=y ++CONFIG_USB_XHCI_PCI=y ++CONFIG_VFIO=y ++CONFIG_VFIO_PCI=y ++CONFIG_VGA=y ++CONFIG_VGA_PCI=y ++CONFIG_VHOST_USER=y ++CONFIG_VIRTIO_PCI=y ++CONFIG_VIRTIO_VGA=y ++CONFIG_WDT_IB6300ESB=y ++CONFIG_XICS=y ++CONFIG_XIVE=y ++CONFIG_TPM=y ++CONFIG_TPM_SPAPR=y ++CONFIG_TPM_EMULATOR=y ++CONFIG_TPM_PASSTHROUGH=y +diff --git a/configs/devices/rh-virtio.mak b/configs/devices/rh-virtio.mak +new file mode 100644 +index 0000000000..94ede1b5f6 +--- /dev/null ++++ b/configs/devices/rh-virtio.mak +@@ -0,0 +1,10 @@ ++CONFIG_VIRTIO=y ++CONFIG_VIRTIO_BALLOON=y ++CONFIG_VIRTIO_BLK=y ++CONFIG_VIRTIO_GPU=y ++CONFIG_VIRTIO_INPUT=y ++CONFIG_VIRTIO_INPUT_HOST=y ++CONFIG_VIRTIO_NET=y ++CONFIG_VIRTIO_RNG=y ++CONFIG_VIRTIO_SCSI=y ++CONFIG_VIRTIO_SERIAL=y +diff --git a/configs/devices/s390x-softmmu/s390x-rh-devices.mak b/configs/devices/s390x-softmmu/s390x-rh-devices.mak +new file mode 100644 +index 0000000000..165c082e87 +--- /dev/null ++++ b/configs/devices/s390x-softmmu/s390x-rh-devices.mak +@@ -0,0 +1,16 @@ ++include ../rh-virtio.mak ++ ++CONFIG_PCI=y ++CONFIG_S390_CCW_VIRTIO=y ++CONFIG_S390_FLIC=y ++CONFIG_S390_FLIC_KVM=y ++CONFIG_SCLPCONSOLE=y ++CONFIG_SCSI=y ++CONFIG_TERMINAL3270=y ++CONFIG_VFIO=y ++CONFIG_VFIO_AP=y ++CONFIG_VFIO_CCW=y ++CONFIG_VFIO_PCI=y ++CONFIG_VHOST_USER=y ++CONFIG_VIRTIO_CCW=y ++CONFIG_WDT_DIAG288=y +diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +new file mode 100644 +index 0000000000..ddf036f042 +--- /dev/null ++++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +@@ -0,0 +1,104 @@ ++include ../rh-virtio.mak ++include x86_64-upstream-devices.mak ++ ++CONFIG_AC97=y ++CONFIG_ACPI=y ++CONFIG_ACPI_PCI=y ++CONFIG_ACPI_CPU_HOTPLUG=y ++CONFIG_ACPI_MEMORY_HOTPLUG=y ++CONFIG_ACPI_NVDIMM=y ++CONFIG_ACPI_SMBUS=y ++CONFIG_ACPI_VMGENID=y ++CONFIG_ACPI_X86=y ++CONFIG_ACPI_X86_ICH=y ++CONFIG_AHCI=y ++CONFIG_APIC=y ++CONFIG_APM=y ++CONFIG_BOCHS_DISPLAY=y ++CONFIG_DIMM=y ++CONFIG_E1000E_PCI_EXPRESS=y ++CONFIG_E1000_PCI=y ++CONFIG_EDU=y ++CONFIG_FDC=y ++CONFIG_FDC_SYSBUS=y ++CONFIG_FW_CFG_DMA=y ++CONFIG_HDA=y ++CONFIG_HYPERV=y ++CONFIG_HYPERV_TESTDEV=y ++CONFIG_I2C=y ++CONFIG_I440FX=y ++CONFIG_I8254=y ++CONFIG_I8257=y ++CONFIG_I8259=y ++CONFIG_I82801B11=y ++CONFIG_IDE_CORE=y ++CONFIG_IDE_PCI=y ++CONFIG_IDE_PIIX=y ++CONFIG_IDE_QDEV=y ++CONFIG_IOAPIC=y ++CONFIG_IOH3420=y ++CONFIG_ISA_BUS=y ++CONFIG_ISA_DEBUG=y ++CONFIG_ISA_TESTDEV=y ++CONFIG_LPC_ICH9=y ++CONFIG_MC146818RTC=y ++CONFIG_MEM_DEVICE=y ++CONFIG_NVDIMM=y ++CONFIG_PAM=y ++CONFIG_PC=y ++CONFIG_PCI=y ++CONFIG_PCIE_PORT=y ++CONFIG_PCI_DEVICES=y ++CONFIG_PCI_EXPRESS=y ++CONFIG_PCI_EXPRESS_Q35=y ++CONFIG_PCI_I440FX=y ++CONFIG_PCI_TESTDEV=y ++CONFIG_PCKBD=y ++CONFIG_PCSPK=y ++CONFIG_PC_ACPI=y ++CONFIG_PC_PCI=y ++CONFIG_PFLASH_CFI01=y ++CONFIG_PVPANIC_ISA=y ++CONFIG_PXB=y ++CONFIG_Q35=y ++CONFIG_QXL=y ++CONFIG_RTL8139_PCI=y ++CONFIG_SCSI=y ++CONFIG_SERIAL=y ++CONFIG_SERIAL_ISA=y ++CONFIG_SERIAL_PCI=y ++CONFIG_SEV=y ++CONFIG_SGA=y ++CONFIG_SMBIOS=y ++CONFIG_SMBUS_EEPROM=y ++CONFIG_TEST_DEVICES=y ++CONFIG_USB=y ++CONFIG_USB_EHCI=y ++CONFIG_USB_EHCI_PCI=y ++CONFIG_USB_SMARTCARD=y ++CONFIG_USB_STORAGE_CORE=y ++CONFIG_USB_STORAGE_CLASSIC=y ++CONFIG_USB_UHCI=y ++CONFIG_USB_XHCI=y ++CONFIG_USB_XHCI_NEC=y ++CONFIG_USB_XHCI_PCI=y ++CONFIG_VFIO=y ++CONFIG_VFIO_PCI=y ++CONFIG_VGA=y ++CONFIG_VGA_CIRRUS=y ++CONFIG_VGA_PCI=y ++CONFIG_VHOST_USER=y ++CONFIG_VHOST_USER_BLK=y ++CONFIG_VIRTIO_PCI=y ++CONFIG_VIRTIO_VGA=y ++CONFIG_VMMOUSE=y ++CONFIG_VMPORT=y ++CONFIG_VTD=y ++CONFIG_WDT_IB6300ESB=y ++CONFIG_WDT_IB700=y ++CONFIG_XIO3130=y ++CONFIG_TPM=y ++CONFIG_TPM_CRB=y ++CONFIG_TPM_TIS_ISA=y ++CONFIG_TPM_EMULATOR=y ++CONFIG_TPM_PASSTHROUGH=y +diff --git a/configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak b/configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak +new file mode 100644 +index 0000000000..2cd20f54d2 +--- /dev/null ++++ b/configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak +@@ -0,0 +1,4 @@ ++# We need "isa-parallel" ++CONFIG_PARALLEL=y ++# We need "hpet" ++CONFIG_HPET=y +diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c +index ebe08ed831..381ef2ddcf 100644 +--- a/hw/acpi/ich9.c ++++ b/hw/acpi/ich9.c +@@ -438,8 +438,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) + static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; + pm->acpi_memory_hotplug.is_enabled = true; + pm->cpu_hotplug_legacy = true; +- pm->disable_s3 = 0; +- pm->disable_s4 = 0; ++ pm->disable_s3 = 1; ++ pm->disable_s4 = 1; + pm->s4_val = 2; + pm->use_acpi_hotplug_bridge = true; + pm->keep_pci_slot_hpc = true; +diff --git a/hw/arm/meson.build b/hw/arm/meson.build +index 721a8eb8be..87ed4dd914 100644 +--- a/hw/arm/meson.build ++++ b/hw/arm/meson.build +@@ -31,7 +31,7 @@ arm_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c')) + arm_ss.add(when: 'CONFIG_ZYNQ', if_true: files('xilinx_zynq.c')) + arm_ss.add(when: 'CONFIG_SABRELITE', if_true: files('sabrelite.c')) + +-arm_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('armv7m.c')) ++#arm_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('armv7m.c')) + arm_ss.add(when: 'CONFIG_EXYNOS4', if_true: files('exynos4210.c')) + arm_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx.c', 'pxa2xx_gpio.c', 'pxa2xx_pic.c')) + arm_ss.add(when: 'CONFIG_DIGIC', if_true: files('digic.c')) +diff --git a/hw/block/fdc.c b/hw/block/fdc.c +index 21d18ac2e3..97fa6de423 100644 +--- a/hw/block/fdc.c ++++ b/hw/block/fdc.c +@@ -48,6 +48,8 @@ + #include "qom/object.h" + #include "fdc-internal.h" + ++#include "hw/boards.h" ++ + /********************************************************/ + /* debug Floppy devices */ + +@@ -2337,6 +2339,14 @@ void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, Error **errp) + FDrive *drive; + static int command_tables_inited = 0; + ++ /* Restricted for Red Hat Enterprise Linux: */ ++ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); ++ if (!strstr(mc->name, "-rhel7.")) { ++ error_setg(errp, "Device %s is not supported with machine type %s", ++ object_get_typename(OBJECT(dev)), mc->name); ++ return; ++ } ++ + if (fdctrl->fallback == FLOPPY_DRIVE_TYPE_AUTO) { + error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'"); + return; +diff --git a/hw/char/parallel.c b/hw/char/parallel.c +index b45e67bfbb..e5f108211b 100644 +--- a/hw/char/parallel.c ++++ b/hw/char/parallel.c +@@ -29,6 +29,7 @@ + #include "chardev/char-parallel.h" + #include "chardev/char-fe.h" + #include "hw/acpi/aml-build.h" ++#include "hw/boards.h" + #include "hw/irq.h" + #include "hw/isa/isa.h" + #include "hw/qdev-properties.h" +@@ -534,6 +535,14 @@ static void parallel_isa_realizefn(DeviceState *dev, Error **errp) + int base; + uint8_t dummy; + ++ /* Restricted for Red Hat Enterprise Linux */ ++ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); ++ if (strstr(mc->name, "rhel")) { ++ error_setg(errp, "Device %s is not supported with machine type %s", ++ object_get_typename(OBJECT(dev)), mc->name); ++ return; ++ } ++ + if (!qemu_chr_fe_backend_connected(&s->chr)) { + error_setg(errp, "Can't create parallel device, empty char device"); + return; +diff --git a/hw/cpu/meson.build b/hw/cpu/meson.build +index 9e52fee9e7..bb71c9f3e7 100644 +--- a/hw/cpu/meson.build ++++ b/hw/cpu/meson.build +@@ -1,6 +1,7 @@ +-softmmu_ss.add(files('core.c', 'cluster.c')) ++#softmmu_ss.add(files('core.c', 'cluster.c')) ++softmmu_ss.add(files('core.c')) + + specific_ss.add(when: 'CONFIG_ARM11MPCORE', if_true: files('arm11mpcore.c')) + specific_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview_mpcore.c')) + specific_ss.add(when: 'CONFIG_A9MPCORE', if_true: files('a9mpcore.c')) +-specific_ss.add(when: 'CONFIG_A15MPCORE', if_true: files('a15mpcore.c')) ++#specific_ss.add(when: 'CONFIG_A15MPCORE', if_true: files('a15mpcore.c')) +diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c +index fdca6ca659..fa1a7eee51 100644 +--- a/hw/display/cirrus_vga.c ++++ b/hw/display/cirrus_vga.c +@@ -2945,6 +2945,9 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) + PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); + int16_t device_id = pc->device_id; + ++ warn_report("'cirrus-vga' is deprecated, " ++ "please use a different VGA card instead"); ++ + /* follow real hardware, cirrus card emulated has 4 MB video memory. + Also accept 8 MB/16 MB for backward compatibility. */ + if (s->vga.vram_size_mb != 4 && s->vga.vram_size_mb != 8 && +diff --git a/hw/ide/piix.c b/hw/ide/piix.c +index ce89fd0aa3..fbcf802b13 100644 +--- a/hw/ide/piix.c ++++ b/hw/ide/piix.c +@@ -232,7 +232,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) + k->device_id = PCI_DEVICE_ID_INTEL_82371SB_1; + k->class_id = PCI_CLASS_STORAGE_IDE; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); +- dc->hotpluggable = false; ++ /* Disabled for Red Hat Enterprise Linux: */ ++ dc->user_creatable = false; + } + + static const TypeInfo piix3_ide_info = { +@@ -261,6 +262,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) + k->class_id = PCI_CLASS_STORAGE_IDE; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + dc->hotpluggable = false; ++ /* Disabled for Red Hat Enterprise Linux: */ ++ dc->user_creatable = false; + } + + static const TypeInfo piix4_ide_info = { +diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c +index baba62f357..bc360347ea 100644 +--- a/hw/input/pckbd.c ++++ b/hw/input/pckbd.c +@@ -796,6 +796,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) + dc->vmsd = &vmstate_kbd_isa; + isa->build_aml = i8042_build_aml; + set_bit(DEVICE_CATEGORY_INPUT, dc->categories); ++ /* Disabled for Red Hat Enterprise Linux: */ ++ dc->user_creatable = false; + } + + static const TypeInfo i8042_info = { +diff --git a/hw/net/e1000.c b/hw/net/e1000.c +index f5bc81296d..282d01e374 100644 +--- a/hw/net/e1000.c ++++ b/hw/net/e1000.c +@@ -1821,6 +1821,7 @@ static const E1000Info e1000_devices[] = { + .revision = 0x03, + .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, + }, ++#if 0 /* Disabled for Red Hat Enterprise Linux 7 */ + { + .name = "e1000-82544gc", + .device_id = E1000_DEV_ID_82544GC_COPPER, +@@ -1833,6 +1834,7 @@ static const E1000Info e1000_devices[] = { + .revision = 0x03, + .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, + }, ++#endif + }; + + static void e1000_register_types(void) +diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c +index 58e7341cb7..8ba34f6a1d 100644 +--- a/hw/ppc/spapr_cpu_core.c ++++ b/hw/ppc/spapr_cpu_core.c +@@ -370,10 +370,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { + .instance_size = sizeof(SpaprCpuCore), + .class_size = sizeof(SpaprCpuCoreClass), + }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_SPAPR_CPU_CORE_TYPE("970_v2.2"), + DEFINE_SPAPR_CPU_CORE_TYPE("970mp_v1.0"), + DEFINE_SPAPR_CPU_CORE_TYPE("970mp_v1.1"), + DEFINE_SPAPR_CPU_CORE_TYPE("power5+_v2.1"), ++#endif + DEFINE_SPAPR_CPU_CORE_TYPE("power7_v2.3"), + DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), + DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), +diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c +index 9520471be2..202e032524 100644 +--- a/hw/timer/hpet.c ++++ b/hw/timer/hpet.c +@@ -733,6 +733,14 @@ static void hpet_realize(DeviceState *dev, Error **errp) + int i; + HPETTimer *timer; + ++ /* Restricted for Red Hat Enterprise Linux */ ++ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); ++ if (strstr(mc->name, "rhel")) { ++ error_setg(errp, "Device %s is not supported with machine type %s", ++ object_get_typename(OBJECT(dev)), mc->name); ++ return; ++ } ++ + if (!s->intcap) { + warn_report("Hpet's intcap not initialized"); + } +diff --git a/hw/usb/meson.build b/hw/usb/meson.build +index de853d780d..0776ae6a20 100644 +--- a/hw/usb/meson.build ++++ b/hw/usb/meson.build +@@ -52,7 +52,7 @@ softmmu_ss.add(when: 'CONFIG_USB_SMARTCARD', if_true: files('dev-smartcard-reade + if cacard.found() + usbsmartcard_ss = ss.source_set() + usbsmartcard_ss.add(when: 'CONFIG_USB_SMARTCARD', +- if_true: [cacard, files('ccid-card-emulated.c', 'ccid-card-passthru.c')]) ++ if_true: [cacard, files('ccid-card-passthru.c')]) + hw_usb_modules += {'smartcard': usbsmartcard_ss} + endif + +diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c +index 13d0e9b195..3826fa5122 100644 +--- a/target/arm/cpu_tcg.c ++++ b/target/arm/cpu_tcg.c +@@ -22,6 +22,7 @@ + /* CPU models. These are not needed for the AArch64 linux-user build. */ + #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) + static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) + { +@@ -375,6 +376,7 @@ static void cortex_a9_initfn(Object *obj) + cpu->ccsidr[1] = 0x200fe019; /* 16k L1 icache. */ + define_arm_cp_regs(cpu, cortexa9_cp_reginfo); + } ++#endif /* disabled for RHEL */ + + #ifndef CONFIG_USER_ONLY + static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) +@@ -400,6 +402,7 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { + REGINFO_SENTINEL + }; + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void cortex_a7_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -445,6 +448,7 @@ static void cortex_a7_initfn(Object *obj) + cpu->ccsidr[2] = 0x711fe07a; /* 4096K L2 unified cache */ + define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */ + } ++#endif /* disabled for RHEL */ + + static void cortex_a15_initfn(Object *obj) + { +@@ -488,6 +492,7 @@ static void cortex_a15_initfn(Object *obj) + define_arm_cp_regs(cpu, cortexa15_cp_reginfo); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void cortex_m0_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -928,6 +933,7 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) + + cc->gdb_core_xml_file = "arm-m-profile.xml"; + } ++#endif /* disabled for RHEL */ + + #ifndef TARGET_AARCH64 + /* +@@ -1007,6 +1013,7 @@ static void arm_max_initfn(Object *obj) + #endif /* !TARGET_AARCH64 */ + + static const ARMCPUInfo arm_tcg_cpus[] = { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { .name = "arm926", .initfn = arm926_initfn }, + { .name = "arm946", .initfn = arm946_initfn }, + { .name = "arm1026", .initfn = arm1026_initfn }, +@@ -1022,7 +1029,9 @@ static const ARMCPUInfo arm_tcg_cpus[] = { + { .name = "cortex-a7", .initfn = cortex_a7_initfn }, + { .name = "cortex-a8", .initfn = cortex_a8_initfn }, + { .name = "cortex-a9", .initfn = cortex_a9_initfn }, ++#endif /* disabled for RHEL */ + { .name = "cortex-a15", .initfn = cortex_a15_initfn }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { .name = "cortex-m0", .initfn = cortex_m0_initfn, + .class_init = arm_v7m_class_init }, + { .name = "cortex-m3", .initfn = cortex_m3_initfn, +@@ -1053,6 +1062,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { + { .name = "pxa270-b1", .initfn = pxa270b1_initfn }, + { .name = "pxa270-c0", .initfn = pxa270c0_initfn }, + { .name = "pxa270-c5", .initfn = pxa270c5_initfn }, ++#endif /* disabled for RHEL */ + #ifndef TARGET_AARCH64 + { .name = "max", .initfn = arm_max_initfn }, + #endif +diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c +index 4baa111713..d779c4d1d5 100644 +--- a/target/ppc/cpu-models.c ++++ b/target/ppc/cpu-models.c +@@ -66,6 +66,7 @@ + #define POWERPC_DEF(_name, _pvr, _type, _desc) \ + POWERPC_DEF_SVR(_name, _desc, _pvr, POWERPC_SVR_NONE, _type) + ++#if 0 /* Embedded and 32-bit CPUs disabled for Red Hat Enterprise Linux */ + /* Embedded PowerPC */ + /* PowerPC 401 family */ + POWERPC_DEF("401", CPU_POWERPC_401, 401, +@@ -740,8 +741,10 @@ + "PowerPC 7447A v1.2 (G4)") + POWERPC_DEF("7457a_v1.2", CPU_POWERPC_74x7A_v12, 7455, + "PowerPC 7457A v1.2 (G4)") ++#endif + /* 64 bits PowerPC */ + #if defined(TARGET_PPC64) ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + POWERPC_DEF("970_v2.2", CPU_POWERPC_970_v22, 970, + "PowerPC 970 v2.2") + POWERPC_DEF("970fx_v1.0", CPU_POWERPC_970FX_v10, 970, +@@ -760,6 +763,7 @@ + "PowerPC 970MP v1.1") + POWERPC_DEF("power5+_v2.1", CPU_POWERPC_POWER5P_v21, POWER5P, + "POWER5+ v2.1") ++#endif + POWERPC_DEF("power7_v2.3", CPU_POWERPC_POWER7_v23, POWER7, + "POWER7 v2.3") + POWERPC_DEF("power7+_v2.1", CPU_POWERPC_POWER7P_v21, POWER7, +@@ -784,6 +788,7 @@ + /* PowerPC CPU aliases */ + + PowerPCCPUAlias ppc_cpu_aliases[] = { ++#if 0 /* Embedded and 32-bit CPUs disabled for Red Hat Enterprise Linux */ + { "403", "403gc" }, + { "405", "405d4" }, + { "405cr", "405crc" }, +@@ -942,12 +947,15 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { + { "7447a", "7447a_v1.2" }, + { "7457a", "7457a_v1.2" }, + { "apollo7pm", "7457a_v1.0" }, ++#endif + #if defined(TARGET_PPC64) ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { "970", "970_v2.2" }, + { "970fx", "970fx_v3.1" }, + { "970mp", "970mp_v1.1" }, + { "power5+", "power5+_v2.1" }, + { "power5gs", "power5+_v2.1" }, ++#endif + { "power7", "power7_v2.3" }, + { "power7+", "power7+_v2.1" }, + { "power8e", "power8e_v2.1" }, +@@ -957,6 +965,7 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { + { "power10", "power10_v2.0" }, + #endif + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + /* Generic PowerPCs */ + #if defined(TARGET_PPC64) + { "ppc64", "970fx_v3.1" }, +@@ -964,5 +973,6 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { + { "ppc32", "604" }, + { "ppc", "604" }, + { "default", "604" }, ++#endif + { NULL, NULL } + }; +diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c +index 05c3ccaaff..6a04ccab1b 100644 +--- a/target/s390x/cpu_models_sysemu.c ++++ b/target/s390x/cpu_models_sysemu.c +@@ -36,6 +36,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, + (max_model->def->gen == model->def->gen && + max_model->def->ec_ga < model->def->ec_ga)) { + list_add_feat("type", unavailable); ++ } else if (model->def->gen < 11 && kvm_enabled()) { ++ /* Older CPU models are not supported on Red Hat Enterprise Linux */ ++ list_add_feat("type", unavailable); + } + + /* detect missing features if any to properly report them */ +diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c +index 5b1fdb55c4..c52434985b 100644 +--- a/target/s390x/kvm/kvm.c ++++ b/target/s390x/kvm/kvm.c +@@ -2508,6 +2508,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) + error_setg(errp, "KVM doesn't support CPU models"); + return; + } ++ ++ /* Older CPU models are not supported on Red Hat Enterprise Linux */ ++ if (model->def->gen < 11) { ++ error_setg(errp, "KVM: Unsupported CPU type specified: %s", ++ MACHINE(qdev_get_machine())->cpu_type); ++ return; ++ } ++ + prop.cpuid = s390_cpuid_from_cpu_model(model); + prop.ibc = s390_ibc_from_cpu_model(model); + /* configure cpu features indicated via STFL(e) */ +-- +2.27.0 + diff --git a/0006-Machine-type-related-general-changes.patch b/0006-Machine-type-related-general-changes.patch new file mode 100644 index 0000000..c3b08a4 --- /dev/null +++ b/0006-Machine-type-related-general-changes.patch @@ -0,0 +1,619 @@ +From a525db3951dc68c469d1f51bdc69ab6e75e72c37 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 11 Jan 2019 09:54:45 +0100 +Subject: Machine type related general changes + +This patch is first part of original "Add RHEL machine types" patch we +split to allow easier review. It contains changes not related to any +architecture. + +Signed-off-by: Miroslav Rezanina +-- +Rebase notes (6.2.0): +- Do not duplicate minimal_version_id for piix4_pm +- Remove empty line chunks in serial.c +- Remove migration.h include in serial.c +- Update hw_compat_rhel_8_5 (from MR 66) + +Rebase notes (7.0.0): +- Remove downstream changes leftovers in hw/rtc/mc146818rtc.c +- Remove unnecessary change in hw/usb/hcd-uhci.c + +Merged patches (6.1.0): +- f2fb42a3c6 redhat: add missing entries in hw_compat_rhel_8_4 +- 1949ec258e hw/arm/virt: Disable PL011 clock migration through hw_compat_rhel_8_3 +- a3995e2eff Remove RHEL 7.0.0 machine type (only generic changes) +- ad3190a79b Remove RHEL 7.1.0 machine type (only generic changes) +- 84bbe15d4e Remove RHEL 7.2.0 machine type (only generic changes) +- 0215eb3356 Remove RHEL 7.3.0 machine types (only generic changes) +- af69d1ca6e Remove RHEL 7.4.0 machine types (only generic changes) +- 8f7a74ab78 Remove RHEL 7.5.0 machine types (only generic changes) + +Merged patches (6.2.0): +- d687ac13d2 redhat: Define hw_compat_rhel_8_5 + +Merged patches (7.0.0): +- ef5afcc86d Fix virtio-net-pci* "vectors" compat +- 168f0d56e3 compat: Update hw_compat_rhel_8_5 with 6.2.0 RC2 changes +--- + hw/acpi/piix4.c | 6 +- + hw/arm/virt.c | 2 +- + hw/core/machine.c | 186 +++++++++++++++++++++++++++++++++++ + hw/display/vga-isa.c | 2 +- + hw/i386/pc_piix.c | 2 + + hw/i386/pc_q35.c | 2 + + hw/net/rtl8139.c | 4 +- + hw/smbios/smbios.c | 46 ++++++++- + hw/timer/i8254_common.c | 2 +- + hw/usb/hcd-xhci-pci.c | 59 ++++++++--- + hw/usb/hcd-xhci-pci.h | 1 + + include/hw/boards.h | 21 ++++ + include/hw/firmware/smbios.h | 5 +- + include/hw/i386/pc.h | 3 + + 14 files changed, 316 insertions(+), 25 deletions(-) + +diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c +index fe5625d07a..28544e78c3 100644 +--- a/hw/acpi/piix4.c ++++ b/hw/acpi/piix4.c +@@ -287,7 +287,7 @@ static bool vmstate_test_migrate_acpi_index(void *opaque, int version_id) + static const VMStateDescription vmstate_acpi = { + .name = "piix4_pm", + .version_id = 3, +- .minimum_version_id = 3, ++ .minimum_version_id = 2, + .post_load = vmstate_acpi_post_load, + .fields = (VMStateField[]) { + VMSTATE_PCI_DEVICE(parent_obj, PIIX4PMState), +@@ -653,8 +653,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) + + static Property piix4_pm_properties[] = { + DEFINE_PROP_UINT32("smb_io_base", PIIX4PMState, smb_io_base, 0), +- DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 0), +- DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 0), ++ DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 1), ++ DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 1), + DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_VAL, PIIX4PMState, s4_val, 2), + DEFINE_PROP_BOOL(ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, PIIX4PMState, + use_acpi_hotplug_bridge, true), +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index d2e5ecd234..6a84031fd7 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1596,7 +1596,7 @@ static void virt_build_smbios(VirtMachineState *vms) + + smbios_set_defaults("QEMU", product, + vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, +- true, SMBIOS_ENTRY_POINT_TYPE_64); ++ true, NULL, NULL, SMBIOS_ENTRY_POINT_TYPE_64); + + smbios_get_tables(MACHINE(vms), NULL, 0, + &smbios_tables, &smbios_tables_len, +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 1e23fdc14b..ea430d844e 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -37,6 +37,192 @@ + #include "hw/virtio/virtio.h" + #include "hw/virtio/virtio-pci.h" + ++/* ++ * Mostly the same as hw_compat_6_0 and hw_compat_6_1 ++ */ ++GlobalProperty hw_compat_rhel_8_5[] = { ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "gpex-pcihost", "allow-unmapped-accesses", "false" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "i8042", "extended-state", "false"}, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "nvme-ns", "eui64-default", "off"}, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "e1000", "init-vet", "off" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "e1000e", "init-vet", "off" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "vhost-vsock-device", "seqpacket", "off" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_1 */ ++ { "vhost-user-vsock-device", "seqpacket", "off" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_1 */ ++ { "nvme-ns", "shared", "off" }, ++}; ++const size_t hw_compat_rhel_8_5_len = G_N_ELEMENTS(hw_compat_rhel_8_5); ++ ++/* ++ * Mostly the same as hw_compat_5_2 ++ */ ++GlobalProperty hw_compat_rhel_8_4[] = { ++ /* hw_compat_rhel_8_4 from hw_compat_5_2 */ ++ { "ICH9-LPC", "smm-compat", "on"}, ++ /* hw_compat_rhel_8_4 from hw_compat_5_2 */ ++ { "PIIX4_PM", "smm-compat", "on"}, ++ /* hw_compat_rhel_8_4 from hw_compat_5_2 */ ++ { "virtio-blk-device", "report-discard-granularity", "off" }, ++ /* hw_compat_rhel_8_4 from hw_compat_5_2 */ ++ /* ++ * Upstream incorrectly had "virtio-net-pci" instead of "virtio-net-pci-base", ++ * (https://bugzilla.redhat.com/show_bug.cgi?id=1999141) ++ */ ++ { "virtio-net-pci-base", "vectors", "3"}, ++}; ++const size_t hw_compat_rhel_8_4_len = G_N_ELEMENTS(hw_compat_rhel_8_4); ++ ++/* ++ * Mostly the same as hw_compat_5_1 ++ */ ++GlobalProperty hw_compat_rhel_8_3[] = { ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "vhost-scsi", "num_queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "vhost-user-blk", "num-queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "vhost-user-scsi", "num_queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "virtio-blk-device", "num-queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "virtio-scsi-device", "num_queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "nvme", "use-intel-id", "on"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "pvpanic", "events", "1"}, /* PVPANIC_PANICKED */ ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "pl011", "migrate-clk", "off" }, ++ /* hw_compat_rhel_8_3 bz 1912846 */ ++ { "pci-xhci", "x-rh-late-msi-cap", "off" }, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "virtio-pci", "x-ats-page-aligned", "off"}, ++}; ++const size_t hw_compat_rhel_8_3_len = G_N_ELEMENTS(hw_compat_rhel_8_3); ++ ++/* ++ * The same as hw_compat_4_2 + hw_compat_5_0 ++ */ ++GlobalProperty hw_compat_rhel_8_2[] = { ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-blk-device", "queue-size", "128"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-scsi-device", "virtqueue_size", "128"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-blk-device", "x-enable-wce-if-config-wce", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-blk-device", "seg-max-adjust", "off"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-scsi-device", "seg_max_adjust", "off"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "vhost-blk-device", "seg_max_adjust", "off"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "usb-host", "suppress-remote-wake", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "usb-redir", "suppress-remote-wake", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "qxl", "revision", "4" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "qxl-vga", "revision", "4" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "fw_cfg", "acpi-mr-restore", "false" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-device", "use-disabled-flag", "false" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "pci-host-bridge", "x-config-reg-migration-enabled", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "virtio-balloon-device", "page-poison", "false" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "vmport", "x-read-set-eax", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "vmport", "x-signal-unsupported-cmd", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "vmport", "x-report-vmx-type", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "vmport", "x-cmds-v2", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "virtio-device", "x-disable-legacy-check", "true" }, ++}; ++const size_t hw_compat_rhel_8_2_len = G_N_ELEMENTS(hw_compat_rhel_8_2); ++ ++/* ++ * The same as hw_compat_4_1 ++ */ ++GlobalProperty hw_compat_rhel_8_1[] = { ++ /* hw_compat_rhel_8_1 from hw_compat_4_1 */ ++ { "virtio-pci", "x-pcie-flr-init", "off" }, ++}; ++const size_t hw_compat_rhel_8_1_len = G_N_ELEMENTS(hw_compat_rhel_8_1); ++ ++/* The same as hw_compat_3_1 ++ * format of array has been changed by: ++ * 6c36bddf5340 ("machine: Use shorter format for GlobalProperty arrays") ++ */ ++GlobalProperty hw_compat_rhel_8_0[] = { ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "pcie-root-port", "x-speed", "2_5" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "pcie-root-port", "x-width", "1" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "memory-backend-file", "x-use-canonical-path-for-ramblock-id", "true" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "memory-backend-memfd", "x-use-canonical-path-for-ramblock-id", "true" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "tpm-crb", "ppi", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "tpm-tis", "ppi", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "usb-kbd", "serial", "42" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "usb-mouse", "serial", "42" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "usb-tablet", "serial", "42" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "virtio-blk-device", "discard", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "virtio-blk-device", "write-zeroes", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "VGA", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "secondary-vga", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "bochs-display", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "virtio-vga", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "virtio-gpu-device", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "virtio-device", "use-started", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 - that was added in 4.1 */ ++ { "pcie-root-port-base", "disable-acs", "true" }, ++}; ++const size_t hw_compat_rhel_8_0_len = G_N_ELEMENTS(hw_compat_rhel_8_0); ++ ++/* The same as hw_compat_3_0 + hw_compat_2_12 ++ * except that ++ * there's nothing in 3_0 ++ * migration.decompress-error-check=off was in 7.5 from bz 1584139 ++ */ ++GlobalProperty hw_compat_rhel_7_6[] = { ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "hda-audio", "use-timer", "false" }, ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "cirrus-vga", "global-vmstate", "true" }, ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "VGA", "global-vmstate", "true" }, ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "vmware-svga", "global-vmstate", "true" }, ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "qxl-vga", "global-vmstate", "true" }, ++}; ++const size_t hw_compat_rhel_7_6_len = G_N_ELEMENTS(hw_compat_rhel_7_6); ++ + GlobalProperty hw_compat_6_2[] = { + { "PIIX4_PM", "x-not-migrate-acpi-index", "on"}, + }; +diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c +index 46abbc5653..505467059b 100644 +--- a/hw/display/vga-isa.c ++++ b/hw/display/vga-isa.c +@@ -88,7 +88,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) + } + + static Property vga_isa_properties[] = { +- DEFINE_PROP_UINT32("vgamem_mb", ISAVGAState, state.vram_size_mb, 8), ++ DEFINE_PROP_UINT32("vgamem_mb", ISAVGAState, state.vram_size_mb, 16), + DEFINE_PROP_END_OF_LIST(), + }; + +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index b72c03d0a6..c797e98312 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -177,6 +177,8 @@ static void pc_init1(MachineState *machine, + smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)", + mc->name, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, ++ pcmc->smbios_stream_product, ++ pcmc->smbios_stream_version, + pcms->smbios_entry_point_type); + } + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 1780f79bc1..b695f88c45 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -200,6 +200,8 @@ static void pc_q35_init(MachineState *machine) + smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", + mc->name, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, ++ pcmc->smbios_stream_product, ++ pcmc->smbios_stream_version, + pcms->smbios_entry_point_type); + } + +diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c +index 6b65823b4b..75dacabc43 100644 +--- a/hw/net/rtl8139.c ++++ b/hw/net/rtl8139.c +@@ -3179,7 +3179,7 @@ static int rtl8139_pre_save(void *opaque) + + static const VMStateDescription vmstate_rtl8139 = { + .name = "rtl8139", +- .version_id = 5, ++ .version_id = 4, + .minimum_version_id = 3, + .post_load = rtl8139_post_load, + .pre_save = rtl8139_pre_save, +@@ -3260,7 +3260,9 @@ static const VMStateDescription vmstate_rtl8139 = { + VMSTATE_UINT32(tally_counters.TxMCol, RTL8139State), + VMSTATE_UINT64(tally_counters.RxOkPhy, RTL8139State), + VMSTATE_UINT64(tally_counters.RxOkBrd, RTL8139State), ++#if 0 /* Disabled for Red Hat Enterprise Linux bz 1420195 */ + VMSTATE_UINT32_V(tally_counters.RxOkMul, RTL8139State, 5), ++#endif + VMSTATE_UINT16(tally_counters.TxAbt, RTL8139State), + VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), + +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index 60349ee402..0edcc98434 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -57,6 +57,9 @@ static bool smbios_legacy = true; + static bool smbios_uuid_encoded = true; + /* end: legacy structures & constants for <= 2.0 machines */ + ++/* Set to true for modern Windows 10 HardwareID-6 compat */ ++static bool smbios_type2_required; ++ + + uint8_t *smbios_tables; + size_t smbios_tables_len; +@@ -639,7 +642,7 @@ static void smbios_build_type_1_table(void) + + static void smbios_build_type_2_table(void) + { +- SMBIOS_BUILD_TABLE_PRE(2, T2_BASE, false); /* optional */ ++ SMBIOS_BUILD_TABLE_PRE(2, T2_BASE, smbios_type2_required); + + SMBIOS_TABLE_SET_STR(2, manufacturer_str, type2.manufacturer); + SMBIOS_TABLE_SET_STR(2, product_str, type2.product); +@@ -914,7 +917,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) + + void smbios_set_defaults(const char *manufacturer, const char *product, + const char *version, bool legacy_mode, +- bool uuid_encoded, SmbiosEntryPointType ep_type) ++ bool uuid_encoded, ++ const char *stream_product, ++ const char *stream_version, ++ SmbiosEntryPointType ep_type) + { + smbios_have_defaults = true; + smbios_legacy = legacy_mode; +@@ -935,11 +941,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, + g_free(smbios_entries); + } + ++ /* ++ * If @stream_product & @stream_version are non-NULL, then ++ * we're following rules for new Windows driver support. ++ * The data we have to report is defined in this doc: ++ * ++ * https://docs.microsoft.com/en-us/windows-hardware/drivers/install/specifying-hardware-ids-for-a-computer ++ * ++ * The Windows drivers are written to expect use of the ++ * scheme documented as "HardwareID-6" against Windows 10, ++ * which uses SMBIOS System (Type 1) and Base Board (Type 2) ++ * tables and will match on ++ * ++ * System Manufacturer = Red Hat (@manufacturer) ++ * System SKU Number = 8.2.0 (@stream_version) ++ * Baseboard Manufacturer = Red Hat (@manufacturer) ++ * Baseboard Product = RHEL-AV (@stream_product) ++ * ++ * NB, SKU must be changed with each RHEL-AV release ++ * ++ * Other fields can be freely used by applications using ++ * QEMU. For example apps can use the "System product" ++ * and "System version" to identify themselves. ++ * ++ * We get 'System Manufacturer' and 'Baseboard Manufacturer' ++ */ + SMBIOS_SET_DEFAULT(type1.manufacturer, manufacturer); + SMBIOS_SET_DEFAULT(type1.product, product); + SMBIOS_SET_DEFAULT(type1.version, version); ++ SMBIOS_SET_DEFAULT(type1.family, "Red Hat Enterprise Linux"); ++ if (stream_version != NULL) { ++ SMBIOS_SET_DEFAULT(type1.sku, stream_version); ++ } + SMBIOS_SET_DEFAULT(type2.manufacturer, manufacturer); +- SMBIOS_SET_DEFAULT(type2.product, product); ++ if (stream_product != NULL) { ++ SMBIOS_SET_DEFAULT(type2.product, stream_product); ++ smbios_type2_required = true; ++ } else { ++ SMBIOS_SET_DEFAULT(type2.product, product); ++ } + SMBIOS_SET_DEFAULT(type2.version, version); + SMBIOS_SET_DEFAULT(type3.manufacturer, manufacturer); + SMBIOS_SET_DEFAULT(type3.version, version); +diff --git a/hw/timer/i8254_common.c b/hw/timer/i8254_common.c +index 050875b497..32935da46c 100644 +--- a/hw/timer/i8254_common.c ++++ b/hw/timer/i8254_common.c +@@ -231,7 +231,7 @@ static const VMStateDescription vmstate_pit_common = { + .pre_save = pit_dispatch_pre_save, + .post_load = pit_dispatch_post_load, + .fields = (VMStateField[]) { +- VMSTATE_UINT32_V(channels[0].irq_disabled, PITCommonState, 3), ++ VMSTATE_UINT32(channels[0].irq_disabled, PITCommonState), /* qemu-kvm's v2 had 'flags' here */ + VMSTATE_STRUCT_ARRAY(channels, PITCommonState, 3, 2, + vmstate_pit_channel, PITChannelState), + VMSTATE_INT64(channels[0].next_transition_time, +diff --git a/hw/usb/hcd-xhci-pci.c b/hw/usb/hcd-xhci-pci.c +index e934b1a5b1..e18b05e528 100644 +--- a/hw/usb/hcd-xhci-pci.c ++++ b/hw/usb/hcd-xhci-pci.c +@@ -104,6 +104,33 @@ static int xhci_pci_vmstate_post_load(void *opaque, int version_id) + return 0; + } + ++/* RH bz 1912846 */ ++static bool usb_xhci_pci_add_msi(struct PCIDevice *dev, Error **errp) ++{ ++ int ret; ++ Error *err = NULL; ++ XHCIPciState *s = XHCI_PCI(dev); ++ ++ ret = msi_init(dev, 0x70, s->xhci.numintrs, true, false, &err); ++ /* ++ * Any error other than -ENOTSUP(board's MSI support is broken) ++ * is a programming error ++ */ ++ assert(!ret || ret == -ENOTSUP); ++ if (ret && s->msi == ON_OFF_AUTO_ON) { ++ /* Can't satisfy user's explicit msi=on request, fail */ ++ error_append_hint(&err, "You have to use msi=auto (default) or " ++ "msi=off with this machine type.\n"); ++ error_propagate(errp, err); ++ return true; ++ } ++ assert(!err || s->msi == ON_OFF_AUTO_AUTO); ++ /* With msi=auto, we fall back to MSI off silently */ ++ error_free(err); ++ ++ return false; ++} ++ + static void usb_xhci_pci_realize(struct PCIDevice *dev, Error **errp) + { + int ret; +@@ -125,23 +152,12 @@ static void usb_xhci_pci_realize(struct PCIDevice *dev, Error **errp) + s->xhci.nec_quirks = true; + } + +- if (s->msi != ON_OFF_AUTO_OFF) { +- ret = msi_init(dev, 0x70, s->xhci.numintrs, true, false, &err); +- /* +- * Any error other than -ENOTSUP(board's MSI support is broken) +- * is a programming error +- */ +- assert(!ret || ret == -ENOTSUP); +- if (ret && s->msi == ON_OFF_AUTO_ON) { +- /* Can't satisfy user's explicit msi=on request, fail */ +- error_append_hint(&err, "You have to use msi=auto (default) or " +- "msi=off with this machine type.\n"); ++ if (s->msi != ON_OFF_AUTO_OFF && s->rh_late_msi_cap) { ++ /* This gives the behaviour from 5.2.0 onwards, lspci shows 90,a0,70 */ ++ if (usb_xhci_pci_add_msi(dev, &err)) { + error_propagate(errp, err); + return; + } +- assert(!err || s->msi == ON_OFF_AUTO_AUTO); +- /* With msi=auto, we fall back to MSI off silently */ +- error_free(err); + } + pci_register_bar(dev, 0, + PCI_BASE_ADDRESS_SPACE_MEMORY | +@@ -154,6 +170,14 @@ static void usb_xhci_pci_realize(struct PCIDevice *dev, Error **errp) + assert(ret > 0); + } + ++ /* RH bz 1912846 */ ++ if (s->msi != ON_OFF_AUTO_OFF && !s->rh_late_msi_cap) { ++ /* This gives the older RH machine behaviour, lspci shows 90,70,a0 */ ++ if (usb_xhci_pci_add_msi(dev, &err)) { ++ error_propagate(errp, err); ++ return; ++ } ++ } + if (s->msix != ON_OFF_AUTO_OFF) { + /* TODO check for errors, and should fail when msix=on */ + msix_init(dev, s->xhci.numintrs, +@@ -198,11 +222,18 @@ static void xhci_instance_init(Object *obj) + qdev_alias_all_properties(DEVICE(&s->xhci), obj); + } + ++static Property xhci_pci_properties[] = { ++ /* RH bz 1912846 */ ++ DEFINE_PROP_BOOL("x-rh-late-msi-cap", XHCIPciState, rh_late_msi_cap, true), ++ DEFINE_PROP_END_OF_LIST() ++}; ++ + static void xhci_class_init(ObjectClass *klass, void *data) + { + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + ++ device_class_set_props(dc, xhci_pci_properties); + dc->reset = xhci_pci_reset; + dc->vmsd = &vmstate_xhci_pci; + set_bit(DEVICE_CATEGORY_USB, dc->categories); +diff --git a/hw/usb/hcd-xhci-pci.h b/hw/usb/hcd-xhci-pci.h +index c193f79443..086a1feb1e 100644 +--- a/hw/usb/hcd-xhci-pci.h ++++ b/hw/usb/hcd-xhci-pci.h +@@ -39,6 +39,7 @@ typedef struct XHCIPciState { + XHCIState xhci; + OnOffAuto msi; + OnOffAuto msix; ++ bool rh_late_msi_cap; /* bz 1912846 */ + } XHCIPciState; + + #endif +diff --git a/include/hw/boards.h b/include/hw/boards.h +index c92ac8815c..c90a19b4d1 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -449,4 +449,25 @@ extern const size_t hw_compat_2_2_len; + extern GlobalProperty hw_compat_2_1[]; + extern const size_t hw_compat_2_1_len; + ++extern GlobalProperty hw_compat_rhel_8_5[]; ++extern const size_t hw_compat_rhel_8_5_len; ++ ++extern GlobalProperty hw_compat_rhel_8_4[]; ++extern const size_t hw_compat_rhel_8_4_len; ++ ++extern GlobalProperty hw_compat_rhel_8_3[]; ++extern const size_t hw_compat_rhel_8_3_len; ++ ++extern GlobalProperty hw_compat_rhel_8_2[]; ++extern const size_t hw_compat_rhel_8_2_len; ++ ++extern GlobalProperty hw_compat_rhel_8_1[]; ++extern const size_t hw_compat_rhel_8_1_len; ++ ++extern GlobalProperty hw_compat_rhel_8_0[]; ++extern const size_t hw_compat_rhel_8_0_len; ++ ++extern GlobalProperty hw_compat_rhel_7_6[]; ++extern const size_t hw_compat_rhel_7_6_len; ++ + #endif +diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h +index 4b7ad77a44..9acff96a86 100644 +--- a/include/hw/firmware/smbios.h ++++ b/include/hw/firmware/smbios.h +@@ -272,7 +272,10 @@ void smbios_entry_add(QemuOpts *opts, Error **errp); + void smbios_set_cpuid(uint32_t version, uint32_t features); + void smbios_set_defaults(const char *manufacturer, const char *product, + const char *version, bool legacy_mode, +- bool uuid_encoded, SmbiosEntryPointType ep_type); ++ bool uuid_encoded, ++ const char *stream_product, ++ const char *stream_version, ++ SmbiosEntryPointType ep_type); + uint8_t *smbios_get_table_legacy(MachineState *ms, size_t *length); + void smbios_get_tables(MachineState *ms, + const struct smbios_phys_mem_area *mem_array, +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 1a27de9c8b..91331059d9 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -113,6 +113,9 @@ struct PCMachineClass { + bool smbios_defaults; + bool smbios_legacy_mode; + bool smbios_uuid_encoded; ++ /* New fields needed for Windows HardwareID-6 matching */ ++ const char *smbios_stream_product; ++ const char *smbios_stream_version; + + /* RAM / address space compat: */ + bool gigabyte_align; +-- +2.31.1 + diff --git a/0007-Add-aarch64-machine-types.patch b/0007-Add-aarch64-machine-types.patch new file mode 100644 index 0000000..3c44b11 --- /dev/null +++ b/0007-Add-aarch64-machine-types.patch @@ -0,0 +1,352 @@ +From 697aaa43e3c0f20fc312f06be6c1093f1ba907e1 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 19 Oct 2018 12:53:31 +0200 +Subject: Add aarch64 machine types + +Adding changes to add RHEL machine types for aarch64 architecture. + +Signed-off-by: Miroslav Rezanina +--- +Rebase notes (6.1.0): +- Use CONFIG_TPM check when using TPM structures +- Add support for default_bus_bypass_iommu +- ea4c0b32d9 arm/virt: Register highmem and gic-version as class properties +- 895e1fa86a hw/arm/virt: Add 8.5 and 9.0 machine types and remove older ones + +Rebase notes (7.0.0): +- Added dtb-kaslr-seed option +- Set no_tcg_lpa2 to true + +Merged patches (6.2.0): +- 9a3d4fde0e hw/arm/virt: Remove 9.0 machine type +- f7d04d6695 hw: arm: virt: Add hw_compat_rhel_8_5 to 8.5 machine type + +Merged patches (7.0.0): +- 3b82be3dd3 redhat: virt-rhel8.5.0: Update machine type compatibility for QEMU 6.2.0 update +- c354a86c9b hw/arm/virt: Register "iommu" as a class property +- c1a2630dc9 hw/arm/virt: Register "its" as a class property +- 9d8c61dc93 hw/arm/virt: Rename default_bus_bypass_iommu +- a1d1b6eeb6 hw/arm/virt: Expose the 'RAS' option +- 47f8fe1b82 hw/arm/virt: Add 9.0 machine type and remove 8.5 one +- ed2346788f hw/arm/virt: Check no_tcg_its and minor style changes +--- + hw/arm/virt.c | 234 +++++++++++++++++++++++++++++++++++++++++- + include/hw/arm/virt.h | 8 ++ + 2 files changed, 241 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 6a84031fd7..e06862d22a 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -80,6 +80,7 @@ + #include "hw/char/pl011.h" + #include "qemu/guest-random.h" + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ + static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ + void *data) \ +@@ -106,7 +107,48 @@ + DEFINE_VIRT_MACHINE_LATEST(major, minor, true) + #define DEFINE_VIRT_MACHINE(major, minor) \ + DEFINE_VIRT_MACHINE_LATEST(major, minor, false) +- ++#endif /* disabled for RHEL */ ++ ++#define DEFINE_RHEL_MACHINE_LATEST(m, n, s, latest) \ ++ static void rhel##m##n##s##_virt_class_init(ObjectClass *oc, \ ++ void *data) \ ++ { \ ++ MachineClass *mc = MACHINE_CLASS(oc); \ ++ rhel##m##n##s##_virt_options(mc); \ ++ mc->desc = "RHEL " # m "." # n "." # s " ARM Virtual Machine"; \ ++ if (latest) { \ ++ mc->alias = "virt"; \ ++ mc->is_default = 1; \ ++ } \ ++ } \ ++ static const TypeInfo rhel##m##n##s##_machvirt_info = { \ ++ .name = MACHINE_TYPE_NAME("virt-rhel" # m "." # n "." # s), \ ++ .parent = TYPE_RHEL_MACHINE, \ ++ .class_init = rhel##m##n##s##_virt_class_init, \ ++ }; \ ++ static void rhel##m##n##s##_machvirt_init(void) \ ++ { \ ++ type_register_static(&rhel##m##n##s##_machvirt_info); \ ++ } \ ++ type_init(rhel##m##n##s##_machvirt_init); ++ ++#define DEFINE_RHEL_MACHINE_AS_LATEST(major, minor, subminor) \ ++ DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, true) ++#define DEFINE_RHEL_MACHINE(major, minor, subminor) \ ++ DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, false) ++ ++/* This variable is for changes to properties that are RHEL specific, ++ * different to the current upstream and to be applied to the latest ++ * machine type. ++ */ ++GlobalProperty arm_rhel_compat[] = { ++ { ++ .driver = "virtio-net-pci", ++ .property = "romfile", ++ .value = "", ++ }, ++}; ++const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat); + + /* Number of external interrupt lines to configure the GIC with */ + #define NUM_IRQS 256 +@@ -2250,6 +2292,7 @@ static void machvirt_init(MachineState *machine) + qemu_add_machine_init_done_notifier(&vms->machine_done); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static bool virt_get_secure(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -2277,6 +2320,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) + + vms->virt = value; + } ++#endif /* disabled for RHEL */ + + static bool virt_get_highmem(Object *obj, Error **errp) + { +@@ -2402,6 +2446,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) + vms->ras = value; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static bool virt_get_mte(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -2415,6 +2460,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) + + vms->mte = value; + } ++#endif /* disabled for RHEL */ + + static char *virt_get_gic_version(Object *obj, Error **errp) + { +@@ -2818,6 +2864,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) + return fixed_ipa ? 0 : requested_pa_size; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void virt_machine_class_init(ObjectClass *oc, void *data) + { + MachineClass *mc = MACHINE_CLASS(oc); +@@ -3206,3 +3253,188 @@ static void virt_machine_2_6_options(MachineClass *mc) + vmc->no_pmu = true; + } + DEFINE_VIRT_MACHINE(2, 6) ++#endif /* disabled for RHEL */ ++ ++static void rhel_machine_class_init(ObjectClass *oc, void *data) ++{ ++ MachineClass *mc = MACHINE_CLASS(oc); ++ HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); ++ ++ mc->family = "virt-rhel-Z"; ++ mc->init = machvirt_init; ++ /* Maximum supported VCPU count for all virt-rhel* machines */ ++ mc->max_cpus = 384; ++#ifdef CONFIG_TPM ++ machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS); ++#endif ++ mc->block_default_type = IF_VIRTIO; ++ mc->no_cdrom = 1; ++ mc->pci_allow_0_address = true; ++ /* We know we will never create a pre-ARMv7 CPU which needs 1K pages */ ++ mc->minimum_page_bits = 12; ++ mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids; ++ mc->cpu_index_to_instance_props = virt_cpu_index_to_props; ++ mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a57"); ++ mc->get_default_cpu_node_id = virt_get_default_cpu_node_id; ++ mc->kvm_type = virt_kvm_type; ++ assert(!mc->get_hotplug_handler); ++ mc->get_hotplug_handler = virt_machine_get_hotplug_handler; ++ hc->pre_plug = virt_machine_device_pre_plug_cb; ++ hc->plug = virt_machine_device_plug_cb; ++ hc->unplug_request = virt_machine_device_unplug_request_cb; ++ hc->unplug = virt_machine_device_unplug_cb; ++ mc->nvdimm_supported = true; ++ mc->auto_enable_numa_with_memhp = true; ++ mc->auto_enable_numa_with_memdev = true; ++ mc->default_ram_id = "mach-virt.ram"; ++ ++ object_class_property_add(oc, "acpi", "OnOffAuto", ++ virt_get_acpi, virt_set_acpi, ++ NULL, NULL); ++ object_class_property_set_description(oc, "acpi", ++ "Enable ACPI"); ++ ++ object_class_property_add_bool(oc, "highmem", virt_get_highmem, ++ virt_set_highmem); ++ object_class_property_set_description(oc, "highmem", ++ "Set on/off to enable/disable using " ++ "physical address space above 32 bits"); ++ ++ object_class_property_add_str(oc, "gic-version", virt_get_gic_version, ++ virt_set_gic_version); ++ object_class_property_set_description(oc, "gic-version", ++ "Set GIC version. " ++ "Valid values are 2, 3, host and max"); ++ ++ object_class_property_add_str(oc, "iommu", virt_get_iommu, virt_set_iommu); ++ object_class_property_set_description(oc, "iommu", ++ "Set the IOMMU type. " ++ "Valid values are none and smmuv3"); ++ ++ object_class_property_add_bool(oc, "default-bus-bypass-iommu", ++ virt_get_default_bus_bypass_iommu, ++ virt_set_default_bus_bypass_iommu); ++ object_class_property_set_description(oc, "default-bus-bypass-iommu", ++ "Set on/off to enable/disable " ++ "bypass_iommu for default root bus"); ++ ++ object_class_property_add_bool(oc, "ras", virt_get_ras, ++ virt_set_ras); ++ object_class_property_set_description(oc, "ras", ++ "Set on/off to enable/disable reporting host memory errors " ++ "to a KVM guest using ACPI and guest external abort exceptions"); ++ ++ object_class_property_add_bool(oc, "its", virt_get_its, ++ virt_set_its); ++ object_class_property_set_description(oc, "its", ++ "Set on/off to enable/disable " ++ "ITS instantiation"); ++ ++ object_class_property_add_str(oc, "x-oem-id", ++ virt_get_oem_id, ++ virt_set_oem_id); ++ object_class_property_set_description(oc, "x-oem-id", ++ "Override the default value of field OEMID " ++ "in ACPI table header." ++ "The string may be up to 6 bytes in size"); ++ ++ ++ object_class_property_add_str(oc, "x-oem-table-id", ++ virt_get_oem_table_id, ++ virt_set_oem_table_id); ++ object_class_property_set_description(oc, "x-oem-table-id", ++ "Override the default value of field OEM Table ID " ++ "in ACPI table header." ++ "The string may be up to 8 bytes in size"); ++ ++ object_class_property_add_bool(oc, "dtb-kaslr-seed", ++ virt_get_dtb_kaslr_seed, ++ virt_set_dtb_kaslr_seed); ++ object_class_property_set_description(oc, "dtb-kaslr-seed", ++ "Set off to disable passing of kaslr-seed " ++ "dtb node to guest"); ++} ++ ++static void rhel_virt_instance_init(Object *obj) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(obj); ++ VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); ++ ++ /* EL3 is disabled by default and non-configurable for RHEL */ ++ vms->secure = false; ++ ++ /* EL2 is disabled by default and non-configurable for RHEL */ ++ vms->virt = false; ++ ++ /* High memory is enabled by default */ ++ vms->highmem = true; ++ vms->gic_version = VIRT_GIC_VERSION_NOSEL; ++ ++ vms->highmem_ecam = !vmc->no_highmem_ecam; ++ ++ if (vmc->no_its) { ++ vms->its = false; ++ } else { ++ /* Default allows ITS instantiation */ ++ vms->its = true; ++ ++ if (vmc->no_tcg_its) { ++ vms->tcg_its = false; ++ } else { ++ vms->tcg_its = true; ++ } ++ } ++ ++ /* Default disallows iommu instantiation */ ++ vms->iommu = VIRT_IOMMU_NONE; ++ ++ /* The default root bus is attached to iommu by default */ ++ vms->default_bus_bypass_iommu = false; ++ ++ /* Default disallows RAS instantiation and is non-configurable for RHEL */ ++ vms->ras = false; ++ ++ /* MTE is disabled by default and non-configurable for RHEL */ ++ vms->mte = false; ++ ++ /* Supply a kaslr-seed by default */ ++ vms->dtb_kaslr_seed = true; ++ ++ vms->irqmap = a15irqmap; ++ ++ virt_flash_create(vms); ++ ++ vms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); ++ vms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); ++} ++ ++static const TypeInfo rhel_machine_info = { ++ .name = TYPE_RHEL_MACHINE, ++ .parent = TYPE_MACHINE, ++ .abstract = true, ++ .instance_size = sizeof(VirtMachineState), ++ .class_size = sizeof(VirtMachineClass), ++ .class_init = rhel_machine_class_init, ++ .instance_init = rhel_virt_instance_init, ++ .interfaces = (InterfaceInfo[]) { ++ { TYPE_HOTPLUG_HANDLER }, ++ { } ++ }, ++}; ++ ++static void rhel_machine_init(void) ++{ ++ type_register_static(&rhel_machine_info); ++} ++type_init(rhel_machine_init); ++ ++static void rhel900_virt_options(MachineClass *mc) ++{ ++ VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); ++ ++ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++ ++ /* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */ ++ vmc->no_tcg_lpa2 = true; ++} ++DEFINE_RHEL_MACHINE_AS_LATEST(9, 0, 0) +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 7e76ee2619..9b1efe8f0e 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -179,9 +179,17 @@ struct VirtMachineState { + + #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) + ++#if 0 /* disabled for Red Hat Enterprise Linux */ + #define TYPE_VIRT_MACHINE MACHINE_TYPE_NAME("virt") + OBJECT_DECLARE_TYPE(VirtMachineState, VirtMachineClass, VIRT_MACHINE) + ++#else ++#define TYPE_RHEL_MACHINE MACHINE_TYPE_NAME("virt-rhel") ++typedef struct VirtMachineClass VirtMachineClass; ++typedef struct VirtMachineState VirtMachineState; ++DECLARE_OBJ_CHECKERS(VirtMachineState, VirtMachineClass, VIRT_MACHINE, TYPE_RHEL_MACHINE) ++#endif ++ + void virt_acpi_setup(VirtMachineState *vms); + bool virt_is_acpi_enabled(VirtMachineState *vms); + +-- +2.31.1 + diff --git a/0007-Machine-type-related-general-changes.patch b/0007-Machine-type-related-general-changes.patch new file mode 100644 index 0000000..f7bd665 --- /dev/null +++ b/0007-Machine-type-related-general-changes.patch @@ -0,0 +1,1071 @@ +From adca046d9db670637b9bf2b24f7a4349a9fe2628 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 11 Jan 2019 09:54:45 +0100 +Subject: Machine type related general changes + +This patch is first part of original "Add RHEL machine types" patch we +split to allow easier review. It contains changes not related to any +architecture. + +Signed-off-by: Miroslav Rezanina + +Rebase changes (4.0.0): +- Remove e1000 device duplication changes to reflect upstream solution +- Rewrite machine compat properties to upstream solution + +Rebase changes (4.1.0): +- Removed optional flag for machine compat properties (upstream) +- Remove c3e002cb chunk from hw/net/e1000.c +- Reorder compat structures +- Use one format for compat scructures +- Added compat for virtio-balloon-pci.any_layout for rhel71 + +Rebase changes (weekly-210303): +- Added rhel 8.4.0 compat based on 5.2 compat + +Rebase changes (weekly-211103): +- Do not duplicate minimal_version_id for piix4_pm + +Merged patches (4.0.0): +- d4c0957 compat: Generic HW_COMPAT_RHEL7_6 +- cbac773 virtio: Make disable-legacy/disable-modern compat properties optional + +Merged patches (4.1.0): +- 479ad30 redhat: fix cut'n'paste garbage in hw_compat comments +- f19738e compat: Generic hw_compat_rhel_8_0 + +Merged patches (4.2.0): +- 9f2bfaa machine types: Update hw_compat_rhel_8_0 from hw_compat_4_0 +- ca4a5e8 virtio: Make disable-legacy/disable-modern compat properties optional +- compat: Generic hw_compat_rhel_8_1 (patch 93040/92956) + +Merged patches (5.1.0): +- e6c3fbf hw/smbios: set new default SMBIOS fields for Windows driver support (partialy) +- 8f9f4d8 compat: disable 'edid' for virtio-gpu-ccw + +Merged patches (5.2.0 rc0): +- 8348642 redhat: define hw_compat_8_2 +- 45b8402 redhat: define hw_compat_8_2 +- 4effa71 redhat: Update hw_compat_8_2 +- 0e84dff virtio: skip legacy support check on machine types less than 5.1 (partialy) + +Merged patches (6.0.0): +- fa0063ba67 redhat: Define hw_compat_8_3 +- d98e328c8d usb/hcd-xhci-pci: Fixup capabilities ordering (again) +- b8a2578117 virtio: move 'use-disabled-flag' property to hw_compat_4_2 +- f7940b04c8 virtio-pci: compat page aligned ATS + +Merged patches (weekly-210602): +- 26f25108c1 redhat: add missing entries in hw_compat_rhel_8_4 + +Merged patches (weekly-211006): +- 43c4b9bea6 redhat: Define hw_compat_rhel_8_5 +--- + hw/acpi/ich9.c | 15 ++ + hw/acpi/piix4.c | 6 +- + hw/arm/virt.c | 2 +- + hw/char/serial.c | 16 +++ + hw/core/machine.c | 272 +++++++++++++++++++++++++++++++++++ + hw/display/vga-isa.c | 2 +- + hw/i386/pc_piix.c | 2 + + hw/i386/pc_q35.c | 2 + + hw/net/e1000e.c | 22 +++ + hw/net/rtl8139.c | 4 +- + hw/rtc/mc146818rtc.c | 6 + + hw/smbios/smbios.c | 46 +++++- + hw/timer/i8254_common.c | 2 +- + hw/usb/hcd-uhci.c | 4 +- + hw/usb/hcd-xhci-pci.c | 59 ++++++-- + hw/usb/hcd-xhci-pci.h | 1 + + hw/usb/hcd-xhci.c | 20 +++ + hw/usb/hcd-xhci.h | 2 + + include/hw/acpi/ich9.h | 3 + + include/hw/boards.h | 36 +++++ + include/hw/firmware/smbios.h | 5 +- + include/hw/i386/pc.h | 3 + + include/hw/usb.h | 3 + + migration/migration.c | 2 + + migration/migration.h | 5 + + 25 files changed, 514 insertions(+), 26 deletions(-) + +diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c +index 381ef2ddcf..82bd805b55 100644 +--- a/hw/acpi/ich9.c ++++ b/hw/acpi/ich9.c +@@ -433,6 +433,18 @@ static void ich9_pm_set_keep_pci_slot_hpc(Object *obj, bool value, Error **errp) + s->pm.keep_pci_slot_hpc = value; + } + ++static bool ich9_pm_get_force_rev1_fadt(Object *obj, Error **errp) ++{ ++ ICH9LPCState *s = ICH9_LPC_DEVICE(obj); ++ return s->pm.force_rev1_fadt; ++} ++ ++static void ich9_pm_set_force_rev1_fadt(Object *obj, bool value, Error **errp) ++{ ++ ICH9LPCState *s = ICH9_LPC_DEVICE(obj); ++ s->pm.force_rev1_fadt = value; ++} ++ + void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) + { + static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; +@@ -457,6 +469,9 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) + object_property_add_bool(obj, "cpu-hotplug-legacy", + ich9_pm_get_cpu_hotplug_legacy, + ich9_pm_set_cpu_hotplug_legacy); ++ object_property_add_bool(obj, "__com.redhat_force-rev1-fadt", ++ ich9_pm_get_force_rev1_fadt, ++ ich9_pm_set_force_rev1_fadt); + object_property_add_uint8_ptr(obj, ACPI_PM_PROP_S3_DISABLED, + &pm->disable_s3, OBJ_PROP_FLAG_READWRITE); + object_property_add_uint8_ptr(obj, ACPI_PM_PROP_S4_DISABLED, +diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c +index f0b5fac44a..8d6011c0a3 100644 +--- a/hw/acpi/piix4.c ++++ b/hw/acpi/piix4.c +@@ -278,7 +278,7 @@ static bool piix4_vmstate_need_smbus(void *opaque, int version_id) + static const VMStateDescription vmstate_acpi = { + .name = "piix4_pm", + .version_id = 3, +- .minimum_version_id = 3, ++ .minimum_version_id = 2, + .post_load = vmstate_acpi_post_load, + .fields = (VMStateField[]) { + VMSTATE_PCI_DEVICE(parent_obj, PIIX4PMState), +@@ -644,8 +644,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) + + static Property piix4_pm_properties[] = { + DEFINE_PROP_UINT32("smb_io_base", PIIX4PMState, smb_io_base, 0), +- DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 0), +- DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 0), ++ DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 1), ++ DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 1), + DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_VAL, PIIX4PMState, s4_val, 2), + DEFINE_PROP_BOOL(ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, PIIX4PMState, + use_acpi_hotplug_bridge, true), +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 30da05dfe0..5de4d9d73b 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1590,7 +1590,7 @@ static void virt_build_smbios(VirtMachineState *vms) + + smbios_set_defaults("QEMU", product, + vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, +- true, SMBIOS_ENTRY_POINT_30); ++ true, NULL, NULL, SMBIOS_ENTRY_POINT_30); + + smbios_get_tables(MACHINE(vms), NULL, 0, + &smbios_tables, &smbios_tables_len, +diff --git a/hw/char/serial.c b/hw/char/serial.c +index 7061aacbce..fe8d0afbb0 100644 +--- a/hw/char/serial.c ++++ b/hw/char/serial.c +@@ -37,6 +37,7 @@ + #include "trace.h" + #include "hw/qdev-properties.h" + #include "hw/qdev-properties-system.h" ++#include "migration/migration.h" + + #define UART_LCR_DLAB 0x80 /* Divisor latch access bit */ + +@@ -689,6 +690,9 @@ static int serial_post_load(void *opaque, int version_id) + static bool serial_thr_ipending_needed(void *opaque) + { + SerialState *s = opaque; ++ if (migrate_pre_2_2) { ++ return false; ++ } + + if (s->ier & UART_IER_THRI) { + bool expected_value = ((s->iir & UART_IIR_ID) == UART_IIR_THRI); +@@ -770,6 +774,10 @@ static const VMStateDescription vmstate_serial_xmit_fifo = { + static bool serial_fifo_timeout_timer_needed(void *opaque) + { + SerialState *s = (SerialState *)opaque; ++ if (migrate_pre_2_2) { ++ return false; ++ } ++ + return timer_pending(s->fifo_timeout_timer); + } + +@@ -787,6 +795,10 @@ static const VMStateDescription vmstate_serial_fifo_timeout_timer = { + static bool serial_timeout_ipending_needed(void *opaque) + { + SerialState *s = (SerialState *)opaque; ++ if (migrate_pre_2_2) { ++ return false; ++ } ++ + return s->timeout_ipending != 0; + } + +@@ -804,6 +816,10 @@ static const VMStateDescription vmstate_serial_timeout_ipending = { + static bool serial_poll_needed(void *opaque) + { + SerialState *s = (SerialState *)opaque; ++ if (migrate_pre_2_2) { ++ return false; ++ } ++ + return s->poll_msl >= 0; + } + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 53a99abc56..be4f9864cd 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -37,6 +37,278 @@ + #include "hw/virtio/virtio.h" + #include "hw/virtio/virtio-pci.h" + ++/* ++ * Mostly the same as hw_compat_6_0 ++ */ ++GlobalProperty hw_compat_rhel_8_5[] = { ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "gpex-pcihost", "allow-unmapped-accesses", "false" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "i8042", "extended-state", "false"}, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "nvme-ns", "eui64-default", "off"}, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "e1000", "init-vet", "off" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "e1000e", "init-vet", "off" }, ++}; ++const size_t hw_compat_rhel_8_5_len = G_N_ELEMENTS(hw_compat_rhel_8_5); ++ ++/* ++ * Mostly the same as hw_compat_5_2 ++ */ ++GlobalProperty hw_compat_rhel_8_4[] = { ++ /* hw_compat_rhel_8_4 from hw_compat_5_2 */ ++ { "ICH9-LPC", "smm-compat", "on"}, ++ /* hw_compat_rhel_8_4 from hw_compat_5_2 */ ++ { "PIIX4_PM", "smm-compat", "on"}, ++ /* hw_compat_rhel_8_4 from hw_compat_5_2 */ ++ { "virtio-blk-device", "report-discard-granularity", "off" }, ++ /* hw_compat_rhel_8_4 from hw_compat_5_2 */ ++ { "virtio-net-pci", "vectors", "3"}, ++}; ++const size_t hw_compat_rhel_8_4_len = G_N_ELEMENTS(hw_compat_rhel_8_4); ++ ++/* ++ * Mostly the same as hw_compat_5_1 ++ */ ++GlobalProperty hw_compat_rhel_8_3[] = { ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "vhost-scsi", "num_queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "vhost-user-blk", "num-queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "vhost-user-scsi", "num_queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "virtio-blk-device", "num-queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "virtio-scsi-device", "num_queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "nvme", "use-intel-id", "on"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "pvpanic", "events", "1"}, /* PVPANIC_PANICKED */ ++ /* hw_compat_rhel_8_3 bz 1912846 */ ++ { "pci-xhci", "x-rh-late-msi-cap", "off" }, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "virtio-pci", "x-ats-page-aligned", "off"}, ++}; ++const size_t hw_compat_rhel_8_3_len = G_N_ELEMENTS(hw_compat_rhel_8_3); ++ ++/* ++ * The same as hw_compat_4_2 + hw_compat_5_0 ++ */ ++GlobalProperty hw_compat_rhel_8_2[] = { ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-blk-device", "queue-size", "128"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-scsi-device", "virtqueue_size", "128"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-blk-device", "x-enable-wce-if-config-wce", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-blk-device", "seg-max-adjust", "off"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-scsi-device", "seg_max_adjust", "off"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "vhost-blk-device", "seg_max_adjust", "off"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "usb-host", "suppress-remote-wake", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "usb-redir", "suppress-remote-wake", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "qxl", "revision", "4" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "qxl-vga", "revision", "4" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "fw_cfg", "acpi-mr-restore", "false" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-device", "use-disabled-flag", "false" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "pci-host-bridge", "x-config-reg-migration-enabled", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "virtio-balloon-device", "page-poison", "false" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "vmport", "x-read-set-eax", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "vmport", "x-signal-unsupported-cmd", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "vmport", "x-report-vmx-type", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "vmport", "x-cmds-v2", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "virtio-device", "x-disable-legacy-check", "true" }, ++}; ++const size_t hw_compat_rhel_8_2_len = G_N_ELEMENTS(hw_compat_rhel_8_2); ++ ++/* ++ * The same as hw_compat_4_1 ++ */ ++GlobalProperty hw_compat_rhel_8_1[] = { ++ /* hw_compat_rhel_8_1 from hw_compat_4_1 */ ++ { "virtio-pci", "x-pcie-flr-init", "off" }, ++}; ++const size_t hw_compat_rhel_8_1_len = G_N_ELEMENTS(hw_compat_rhel_8_1); ++ ++/* The same as hw_compat_3_1 ++ * format of array has been changed by: ++ * 6c36bddf5340 ("machine: Use shorter format for GlobalProperty arrays") ++ */ ++GlobalProperty hw_compat_rhel_8_0[] = { ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "pcie-root-port", "x-speed", "2_5" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "pcie-root-port", "x-width", "1" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "memory-backend-file", "x-use-canonical-path-for-ramblock-id", "true" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "memory-backend-memfd", "x-use-canonical-path-for-ramblock-id", "true" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "tpm-crb", "ppi", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "tpm-tis", "ppi", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "usb-kbd", "serial", "42" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "usb-mouse", "serial", "42" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "usb-tablet", "serial", "42" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "virtio-blk-device", "discard", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "virtio-blk-device", "write-zeroes", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "VGA", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "secondary-vga", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "bochs-display", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "virtio-vga", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "virtio-gpu-device", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "virtio-device", "use-started", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 - that was added in 4.1 */ ++ { "pcie-root-port-base", "disable-acs", "true" }, ++}; ++const size_t hw_compat_rhel_8_0_len = G_N_ELEMENTS(hw_compat_rhel_8_0); ++ ++/* The same as hw_compat_3_0 + hw_compat_2_12 ++ * except that ++ * there's nothing in 3_0 ++ * migration.decompress-error-check=off was in 7.5 from bz 1584139 ++ */ ++GlobalProperty hw_compat_rhel_7_6[] = { ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "hda-audio", "use-timer", "false" }, ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "cirrus-vga", "global-vmstate", "true" }, ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "VGA", "global-vmstate", "true" }, ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "vmware-svga", "global-vmstate", "true" }, ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "qxl-vga", "global-vmstate", "true" }, ++}; ++const size_t hw_compat_rhel_7_6_len = G_N_ELEMENTS(hw_compat_rhel_7_6); ++ ++/* The same as hw_compat_2_11 + hw_compat_2_10 */ ++GlobalProperty hw_compat_rhel_7_5[] = { ++ /* hw_compat_rhel_7_5 from hw_compat_2_11 */ ++ { "hpet", "hpet-offset-saved", "false" }, ++ /* hw_compat_rhel_7_5 from hw_compat_2_11 */ ++ { "virtio-blk-pci", "vectors", "2" }, ++ /* hw_compat_rhel_7_5 from hw_compat_2_11 */ ++ { "vhost-user-blk-pci", "vectors", "2" }, ++ /* hw_compat_rhel_7_5 from hw_compat_2_11 ++ bz 1608778 modified for our naming */ ++ { "e1000-82540em", "migrate_tso_props", "off" }, ++ /* hw_compat_rhel_7_5 from hw_compat_2_10 */ ++ { "virtio-mouse-device", "wheel-axis", "false" }, ++ /* hw_compat_rhel_7_5 from hw_compat_2_10 */ ++ { "virtio-tablet-device", "wheel-axis", "false" }, ++ { "cirrus-vga", "vgamem_mb", "16" }, ++ { "migration", "decompress-error-check", "off" }, ++}; ++const size_t hw_compat_rhel_7_5_len = G_N_ELEMENTS(hw_compat_rhel_7_5); ++ ++/* Mostly like hw_compat_2_9 except ++ * x-mtu-bypass-backend, x-migrate-msix has already been ++ * backported to RHEL7.4. shpc was already on in 7.4. ++ */ ++GlobalProperty hw_compat_rhel_7_4[] = { ++ { "intel-iommu", "pt", "off" }, ++}; ++ ++const size_t hw_compat_rhel_7_4_len = G_N_ELEMENTS(hw_compat_rhel_7_4); ++/* Mostly like HW_COMPAT_2_6 + HW_COMPAT_2_7 + HW_COMPAT_2_8 except ++ * disable-modern, disable-legacy, page-per-vq have already been ++ * backported to RHEL7.3 ++ */ ++GlobalProperty hw_compat_rhel_7_3[] = { ++ { "virtio-mmio", "format_transport_address", "off" }, ++ { "virtio-serial-device", "emergency-write", "off" }, ++ { "ioapic", "version", "0x11" }, ++ { "intel-iommu", "x-buggy-eim", "true" }, ++ { "virtio-pci", "x-ignore-backend-features", "on" }, ++ { "fw_cfg_mem", "x-file-slots", stringify(0x10) }, ++ { "fw_cfg_io", "x-file-slots", stringify(0x10) }, ++ { "pflash_cfi01", "old-multiple-chip-handling", "on" }, ++ { TYPE_PCI_DEVICE, "x-pcie-extcap-init", "off" }, ++ { "virtio-pci", "x-pcie-deverr-init", "off" }, ++ { "virtio-pci", "x-pcie-lnkctl-init", "off" }, ++ { "virtio-pci", "x-pcie-pm-init", "off" }, ++ { "virtio-net-device", "x-mtu-bypass-backend", "off" }, ++ { "e1000e", "__redhat_e1000e_7_3_intr_state", "on" }, ++}; ++const size_t hw_compat_rhel_7_3_len = G_N_ELEMENTS(hw_compat_rhel_7_3); ++ ++/* Mostly like hw_compat_2_4 + 2_3 but: ++ * we don't need "any_layout" as it has been backported to 7.2 ++ */ ++GlobalProperty hw_compat_rhel_7_2[] = { ++ { "virtio-blk-device", "scsi", "true" }, ++ { "e1000-82540em", "extra_mac_registers", "off" }, ++ { "virtio-pci", "x-disable-pcie", "on" }, ++ { "virtio-pci", "migrate-extra", "off" }, ++ { "fw_cfg_mem", "dma_enabled", "off" }, ++ { "fw_cfg_io", "dma_enabled", "off" }, ++ { "isa-fdc", "fallback", "144" }, ++ /* Optional because not all virtio-pci devices support legacy mode */ ++ { "virtio-pci", "disable-modern", "on", .optional = true }, ++ { "virtio-pci", "disable-legacy", "off", .optional = true }, ++ { TYPE_PCI_DEVICE, "x-pcie-lnksta-dllla", "off" }, ++ { "virtio-pci", "page-per-vq", "on" }, ++ /* hw_compat_rhel_7_2 - introduced with 2.10.0 */ ++ { "migration", "send-section-footer", "off" }, ++ /* hw_compat_rhel_7_2 - introduced with 2.10.0 */ ++ { "migration", "store-global-state", "off", ++ }, ++}; ++const size_t hw_compat_rhel_7_2_len = G_N_ELEMENTS(hw_compat_rhel_7_2); ++ ++/* Mostly like hw_compat_2_1 but: ++ * we don't need virtio-scsi-pci since 7.0 already had that on ++ * ++ * RH: Note, qemu-extended-regs should have been enabled in the 7.1 ++ * machine type, but was accidentally turned off in 7.2 onwards. ++ */ ++GlobalProperty hw_compat_rhel_7_1[] = { ++ { "intel-hda-generic", "old_msi_addr", "on" }, ++ { "VGA", "qemu-extended-regs", "off" }, ++ { "secondary-vga", "qemu-extended-regs", "off" }, ++ { "usb-mouse", "usb_version", stringify(1) }, ++ { "usb-kbd", "usb_version", stringify(1) }, ++ { "virtio-pci", "virtio-pci-bus-master-bug-migration", "on" }, ++ { "virtio-blk-pci", "any_layout", "off" }, ++ { "virtio-balloon-pci", "any_layout", "off" }, ++ { "virtio-serial-pci", "any_layout", "off" }, ++ { "virtio-9p-pci", "any_layout", "off" }, ++ { "virtio-rng-pci", "any_layout", "off" }, ++ /* HW_COMPAT_RHEL7_1 - introduced with 2.10.0 */ ++ { "migration", "send-configuration", "off" }, ++}; ++const size_t hw_compat_rhel_7_1_len = G_N_ELEMENTS(hw_compat_rhel_7_1); ++ + GlobalProperty hw_compat_6_1[] = { + { "vhost-user-vsock-device", "seqpacket", "off" }, + { "nvme-ns", "shared", "off" }, +diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c +index 90851e730b..a91c5d7467 100644 +--- a/hw/display/vga-isa.c ++++ b/hw/display/vga-isa.c +@@ -85,7 +85,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) + } + + static Property vga_isa_properties[] = { +- DEFINE_PROP_UINT32("vgamem_mb", ISAVGAState, state.vram_size_mb, 8), ++ DEFINE_PROP_UINT32("vgamem_mb", ISAVGAState, state.vram_size_mb, 16), + DEFINE_PROP_END_OF_LIST(), + }; + +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 223dd3e05d..dda3f64f19 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -177,6 +177,8 @@ static void pc_init1(MachineState *machine, + smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)", + mc->name, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, ++ pcmc->smbios_stream_product, ++ pcmc->smbios_stream_version, + SMBIOS_ENTRY_POINT_21); + } + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index e1e100316d..235054a643 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -200,6 +200,8 @@ static void pc_q35_init(MachineState *machine) + smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", + mc->name, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, ++ pcmc->smbios_stream_product, ++ pcmc->smbios_stream_version, + SMBIOS_ENTRY_POINT_21); + } + +diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c +index ac96f7665a..d35bc1f0b0 100644 +--- a/hw/net/e1000e.c ++++ b/hw/net/e1000e.c +@@ -81,6 +81,12 @@ struct E1000EState { + + E1000ECore core; + bool init_vet; ++ ++ /* 7.3 had the intr_state field that was in the original e1000e code ++ * but that was removed prior to 2.7's release ++ */ ++ bool redhat_7_3_intr_state_enable; ++ uint32_t redhat_7_3_intr_state; + }; + + #define E1000E_MMIO_IDX 0 +@@ -96,6 +102,10 @@ struct E1000EState { + #define E1000E_MSIX_TABLE (0x0000) + #define E1000E_MSIX_PBA (0x2000) + ++/* Values as in RHEL 7.3 build and original upstream */ ++#define RH_E1000E_USE_MSI BIT(0) ++#define RH_E1000E_USE_MSIX BIT(1) ++ + static uint64_t + e1000e_mmio_read(void *opaque, hwaddr addr, unsigned size) + { +@@ -307,6 +317,8 @@ e1000e_init_msix(E1000EState *s) + } else { + if (!e1000e_use_msix_vectors(s, E1000E_MSIX_VEC_NUM)) { + msix_uninit(d, &s->msix, &s->msix); ++ } else { ++ s->redhat_7_3_intr_state |= RH_E1000E_USE_MSIX; + } + } + } +@@ -478,6 +490,8 @@ static void e1000e_pci_realize(PCIDevice *pci_dev, Error **errp) + ret = msi_init(PCI_DEVICE(s), 0xD0, 1, true, false, NULL); + if (ret) { + trace_e1000e_msi_init_fail(ret); ++ } else { ++ s->redhat_7_3_intr_state |= RH_E1000E_USE_MSI; + } + + if (e1000e_add_pm_capability(pci_dev, e1000e_pmrb_offset, +@@ -605,6 +619,11 @@ static const VMStateDescription e1000e_vmstate_intr_timer = { + VMSTATE_STRUCT_ARRAY(_f, _s, _num, 0, \ + e1000e_vmstate_intr_timer, E1000IntrDelayTimer) + ++static bool rhel_7_3_check(void *opaque, int version_id) ++{ ++ return ((E1000EState *)opaque)->redhat_7_3_intr_state_enable; ++} ++ + static const VMStateDescription e1000e_vmstate = { + .name = "e1000e", + .version_id = 1, +@@ -616,6 +635,7 @@ static const VMStateDescription e1000e_vmstate = { + VMSTATE_MSIX(parent_obj, E1000EState), + + VMSTATE_UINT32(ioaddr, E1000EState), ++ VMSTATE_UINT32_TEST(redhat_7_3_intr_state, E1000EState, rhel_7_3_check), + VMSTATE_UINT32(core.rxbuf_min_shift, E1000EState), + VMSTATE_UINT8(core.rx_desc_len, E1000EState), + VMSTATE_UINT32_ARRAY(core.rxbuf_sizes, E1000EState, +@@ -664,6 +684,8 @@ static PropertyInfo e1000e_prop_disable_vnet, + + static Property e1000e_properties[] = { + DEFINE_NIC_PROPERTIES(E1000EState, conf), ++ DEFINE_PROP_BOOL("__redhat_e1000e_7_3_intr_state", E1000EState, ++ redhat_7_3_intr_state_enable, false), + DEFINE_PROP_SIGNED("disable_vnet_hdr", E1000EState, disable_vnet, false, + e1000e_prop_disable_vnet, bool), + DEFINE_PROP_SIGNED("subsys_ven", E1000EState, subsys_ven, +diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c +index 90b4fc63ce..3ffb9dd22c 100644 +--- a/hw/net/rtl8139.c ++++ b/hw/net/rtl8139.c +@@ -3179,7 +3179,7 @@ static int rtl8139_pre_save(void *opaque) + + static const VMStateDescription vmstate_rtl8139 = { + .name = "rtl8139", +- .version_id = 5, ++ .version_id = 4, + .minimum_version_id = 3, + .post_load = rtl8139_post_load, + .pre_save = rtl8139_pre_save, +@@ -3260,7 +3260,9 @@ static const VMStateDescription vmstate_rtl8139 = { + VMSTATE_UINT32(tally_counters.TxMCol, RTL8139State), + VMSTATE_UINT64(tally_counters.RxOkPhy, RTL8139State), + VMSTATE_UINT64(tally_counters.RxOkBrd, RTL8139State), ++#if 0 /* Disabled for Red Hat Enterprise Linux bz 1420195 */ + VMSTATE_UINT32_V(tally_counters.RxOkMul, RTL8139State, 5), ++#endif + VMSTATE_UINT16(tally_counters.TxAbt, RTL8139State), + VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), + +diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c +index 4fbafddb22..2f120c6e70 100644 +--- a/hw/rtc/mc146818rtc.c ++++ b/hw/rtc/mc146818rtc.c +@@ -43,6 +43,7 @@ + #include "qapi/qapi-events-misc-target.h" + #include "qapi/visitor.h" + #include "hw/rtc/mc146818rtc_regs.h" ++#include "migration/migration.h" + + #ifdef TARGET_I386 + #include "qapi/qapi-commands-misc-target.h" +@@ -821,6 +822,11 @@ static int rtc_post_load(void *opaque, int version_id) + static bool rtc_irq_reinject_on_ack_count_needed(void *opaque) + { + RTCState *s = (RTCState *)opaque; ++ ++ if (migrate_pre_2_2) { ++ return false; ++ } ++ + return s->irq_reinject_on_ack_count != 0; + } + +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index 7397e56737..3a4bb894ba 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -57,6 +57,9 @@ static bool smbios_legacy = true; + static bool smbios_uuid_encoded = true; + /* end: legacy structures & constants for <= 2.0 machines */ + ++/* Set to true for modern Windows 10 HardwareID-6 compat */ ++static bool smbios_type2_required; ++ + + uint8_t *smbios_tables; + size_t smbios_tables_len; +@@ -619,7 +622,7 @@ static void smbios_build_type_1_table(void) + + static void smbios_build_type_2_table(void) + { +- SMBIOS_BUILD_TABLE_PRE(2, 0x200, false); /* optional */ ++ SMBIOS_BUILD_TABLE_PRE(2, 0x200, smbios_type2_required); + + SMBIOS_TABLE_SET_STR(2, manufacturer_str, type2.manufacturer); + SMBIOS_TABLE_SET_STR(2, product_str, type2.product); +@@ -888,7 +891,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) + + void smbios_set_defaults(const char *manufacturer, const char *product, + const char *version, bool legacy_mode, +- bool uuid_encoded, SmbiosEntryPointType ep_type) ++ bool uuid_encoded, ++ const char *stream_product, ++ const char *stream_version, ++ SmbiosEntryPointType ep_type) + { + smbios_have_defaults = true; + smbios_legacy = legacy_mode; +@@ -909,11 +915,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, + g_free(smbios_entries); + } + ++ /* ++ * If @stream_product & @stream_version are non-NULL, then ++ * we're following rules for new Windows driver support. ++ * The data we have to report is defined in this doc: ++ * ++ * https://docs.microsoft.com/en-us/windows-hardware/drivers/install/specifying-hardware-ids-for-a-computer ++ * ++ * The Windows drivers are written to expect use of the ++ * scheme documented as "HardwareID-6" against Windows 10, ++ * which uses SMBIOS System (Type 1) and Base Board (Type 2) ++ * tables and will match on ++ * ++ * System Manufacturer = Red Hat (@manufacturer) ++ * System SKU Number = 8.2.0 (@stream_version) ++ * Baseboard Manufacturer = Red Hat (@manufacturer) ++ * Baseboard Product = RHEL-AV (@stream_product) ++ * ++ * NB, SKU must be changed with each RHEL-AV release ++ * ++ * Other fields can be freely used by applications using ++ * QEMU. For example apps can use the "System product" ++ * and "System version" to identify themselves. ++ * ++ * We get 'System Manufacturer' and 'Baseboard Manufacturer' ++ */ + SMBIOS_SET_DEFAULT(type1.manufacturer, manufacturer); + SMBIOS_SET_DEFAULT(type1.product, product); + SMBIOS_SET_DEFAULT(type1.version, version); ++ SMBIOS_SET_DEFAULT(type1.family, "Red Hat Enterprise Linux"); ++ if (stream_version != NULL) { ++ SMBIOS_SET_DEFAULT(type1.sku, stream_version); ++ } + SMBIOS_SET_DEFAULT(type2.manufacturer, manufacturer); +- SMBIOS_SET_DEFAULT(type2.product, product); ++ if (stream_product != NULL) { ++ SMBIOS_SET_DEFAULT(type2.product, stream_product); ++ smbios_type2_required = true; ++ } else { ++ SMBIOS_SET_DEFAULT(type2.product, product); ++ } + SMBIOS_SET_DEFAULT(type2.version, version); + SMBIOS_SET_DEFAULT(type3.manufacturer, manufacturer); + SMBIOS_SET_DEFAULT(type3.version, version); +diff --git a/hw/timer/i8254_common.c b/hw/timer/i8254_common.c +index 050875b497..32935da46c 100644 +--- a/hw/timer/i8254_common.c ++++ b/hw/timer/i8254_common.c +@@ -231,7 +231,7 @@ static const VMStateDescription vmstate_pit_common = { + .pre_save = pit_dispatch_pre_save, + .post_load = pit_dispatch_post_load, + .fields = (VMStateField[]) { +- VMSTATE_UINT32_V(channels[0].irq_disabled, PITCommonState, 3), ++ VMSTATE_UINT32(channels[0].irq_disabled, PITCommonState), /* qemu-kvm's v2 had 'flags' here */ + VMSTATE_STRUCT_ARRAY(channels, PITCommonState, 3, 2, + vmstate_pit_channel, PITChannelState), + VMSTATE_INT64(channels[0].next_transition_time, +diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c +index d1b5657d72..7930b868fa 100644 +--- a/hw/usb/hcd-uhci.c ++++ b/hw/usb/hcd-uhci.c +@@ -1166,11 +1166,13 @@ void usb_uhci_common_realize(PCIDevice *dev, Error **errp) + UHCIState *s = UHCI(dev); + uint8_t *pci_conf = s->dev.config; + int i; ++ int irq_pin; + + pci_conf[PCI_CLASS_PROG] = 0x00; + /* TODO: reset value should be 0. */ + pci_conf[USB_SBRN] = USB_RELEASE_1; /* release number */ +- pci_config_set_interrupt_pin(pci_conf, u->info.irq_pin + 1); ++ irq_pin = u->info.irq_pin; ++ pci_config_set_interrupt_pin(pci_conf, irq_pin + 1); + s->irq = pci_allocate_irq(dev); + + if (s->masterbus) { +diff --git a/hw/usb/hcd-xhci-pci.c b/hw/usb/hcd-xhci-pci.c +index e934b1a5b1..e18b05e528 100644 +--- a/hw/usb/hcd-xhci-pci.c ++++ b/hw/usb/hcd-xhci-pci.c +@@ -104,6 +104,33 @@ static int xhci_pci_vmstate_post_load(void *opaque, int version_id) + return 0; + } + ++/* RH bz 1912846 */ ++static bool usb_xhci_pci_add_msi(struct PCIDevice *dev, Error **errp) ++{ ++ int ret; ++ Error *err = NULL; ++ XHCIPciState *s = XHCI_PCI(dev); ++ ++ ret = msi_init(dev, 0x70, s->xhci.numintrs, true, false, &err); ++ /* ++ * Any error other than -ENOTSUP(board's MSI support is broken) ++ * is a programming error ++ */ ++ assert(!ret || ret == -ENOTSUP); ++ if (ret && s->msi == ON_OFF_AUTO_ON) { ++ /* Can't satisfy user's explicit msi=on request, fail */ ++ error_append_hint(&err, "You have to use msi=auto (default) or " ++ "msi=off with this machine type.\n"); ++ error_propagate(errp, err); ++ return true; ++ } ++ assert(!err || s->msi == ON_OFF_AUTO_AUTO); ++ /* With msi=auto, we fall back to MSI off silently */ ++ error_free(err); ++ ++ return false; ++} ++ + static void usb_xhci_pci_realize(struct PCIDevice *dev, Error **errp) + { + int ret; +@@ -125,23 +152,12 @@ static void usb_xhci_pci_realize(struct PCIDevice *dev, Error **errp) + s->xhci.nec_quirks = true; + } + +- if (s->msi != ON_OFF_AUTO_OFF) { +- ret = msi_init(dev, 0x70, s->xhci.numintrs, true, false, &err); +- /* +- * Any error other than -ENOTSUP(board's MSI support is broken) +- * is a programming error +- */ +- assert(!ret || ret == -ENOTSUP); +- if (ret && s->msi == ON_OFF_AUTO_ON) { +- /* Can't satisfy user's explicit msi=on request, fail */ +- error_append_hint(&err, "You have to use msi=auto (default) or " +- "msi=off with this machine type.\n"); ++ if (s->msi != ON_OFF_AUTO_OFF && s->rh_late_msi_cap) { ++ /* This gives the behaviour from 5.2.0 onwards, lspci shows 90,a0,70 */ ++ if (usb_xhci_pci_add_msi(dev, &err)) { + error_propagate(errp, err); + return; + } +- assert(!err || s->msi == ON_OFF_AUTO_AUTO); +- /* With msi=auto, we fall back to MSI off silently */ +- error_free(err); + } + pci_register_bar(dev, 0, + PCI_BASE_ADDRESS_SPACE_MEMORY | +@@ -154,6 +170,14 @@ static void usb_xhci_pci_realize(struct PCIDevice *dev, Error **errp) + assert(ret > 0); + } + ++ /* RH bz 1912846 */ ++ if (s->msi != ON_OFF_AUTO_OFF && !s->rh_late_msi_cap) { ++ /* This gives the older RH machine behaviour, lspci shows 90,70,a0 */ ++ if (usb_xhci_pci_add_msi(dev, &err)) { ++ error_propagate(errp, err); ++ return; ++ } ++ } + if (s->msix != ON_OFF_AUTO_OFF) { + /* TODO check for errors, and should fail when msix=on */ + msix_init(dev, s->xhci.numintrs, +@@ -198,11 +222,18 @@ static void xhci_instance_init(Object *obj) + qdev_alias_all_properties(DEVICE(&s->xhci), obj); + } + ++static Property xhci_pci_properties[] = { ++ /* RH bz 1912846 */ ++ DEFINE_PROP_BOOL("x-rh-late-msi-cap", XHCIPciState, rh_late_msi_cap, true), ++ DEFINE_PROP_END_OF_LIST() ++}; ++ + static void xhci_class_init(ObjectClass *klass, void *data) + { + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + ++ device_class_set_props(dc, xhci_pci_properties); + dc->reset = xhci_pci_reset; + dc->vmsd = &vmstate_xhci_pci; + set_bit(DEVICE_CATEGORY_USB, dc->categories); +diff --git a/hw/usb/hcd-xhci-pci.h b/hw/usb/hcd-xhci-pci.h +index c193f79443..086a1feb1e 100644 +--- a/hw/usb/hcd-xhci-pci.h ++++ b/hw/usb/hcd-xhci-pci.h +@@ -39,6 +39,7 @@ typedef struct XHCIPciState { + XHCIState xhci; + OnOffAuto msi; + OnOffAuto msix; ++ bool rh_late_msi_cap; /* bz 1912846 */ + } XHCIPciState; + + #endif +diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c +index e01700039b..d5ea13356c 100644 +--- a/hw/usb/hcd-xhci.c ++++ b/hw/usb/hcd-xhci.c +@@ -3494,9 +3494,27 @@ static const VMStateDescription vmstate_xhci_slot = { + } + }; + ++static int xhci_event_pre_save(void *opaque) ++{ ++ XHCIEvent *s = opaque; ++ ++ s->cve_2014_5263_a = ((uint8_t *)&s->type)[0]; ++ s->cve_2014_5263_b = ((uint8_t *)&s->type)[1]; ++ ++ return 0; ++} ++ ++bool migrate_cve_2014_5263_xhci_fields; ++ ++static bool xhci_event_cve_2014_5263(void *opaque, int version_id) ++{ ++ return migrate_cve_2014_5263_xhci_fields; ++} ++ + static const VMStateDescription vmstate_xhci_event = { + .name = "xhci-event", + .version_id = 1, ++ .pre_save = xhci_event_pre_save, + .fields = (VMStateField[]) { + VMSTATE_UINT32(type, XHCIEvent), + VMSTATE_UINT32(ccode, XHCIEvent), +@@ -3505,6 +3523,8 @@ static const VMStateDescription vmstate_xhci_event = { + VMSTATE_UINT32(flags, XHCIEvent), + VMSTATE_UINT8(slotid, XHCIEvent), + VMSTATE_UINT8(epid, XHCIEvent), ++ VMSTATE_UINT8_TEST(cve_2014_5263_a, XHCIEvent, xhci_event_cve_2014_5263), ++ VMSTATE_UINT8_TEST(cve_2014_5263_b, XHCIEvent, xhci_event_cve_2014_5263), + VMSTATE_END_OF_LIST() + } + }; +diff --git a/hw/usb/hcd-xhci.h b/hw/usb/hcd-xhci.h +index 98f598382a..50a7b6f6c4 100644 +--- a/hw/usb/hcd-xhci.h ++++ b/hw/usb/hcd-xhci.h +@@ -149,6 +149,8 @@ typedef struct XHCIEvent { + uint32_t flags; + uint8_t slotid; + uint8_t epid; ++ uint8_t cve_2014_5263_a; ++ uint8_t cve_2014_5263_b; + } XHCIEvent; + + typedef struct XHCIInterrupter { +diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h +index 7ca92843c6..21abfd8447 100644 +--- a/include/hw/acpi/ich9.h ++++ b/include/hw/acpi/ich9.h +@@ -68,6 +68,9 @@ typedef struct ICH9LPCPMRegs { + bool smm_compat; + bool enable_tco; + TCOIORegs tco_regs; ++ ++ /* RH addition, see bz 1489800 */ ++ bool force_rev1_fadt; + } ICH9LPCPMRegs; + + #define ACPI_PM_PROP_TCO_ENABLED "enable_tco" +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 9c1c190104..8bba96ef2b 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -441,4 +441,40 @@ extern const size_t hw_compat_2_2_len; + extern GlobalProperty hw_compat_2_1[]; + extern const size_t hw_compat_2_1_len; + ++extern GlobalProperty hw_compat_rhel_8_5[]; ++extern const size_t hw_compat_rhel_8_5_len; ++ ++extern GlobalProperty hw_compat_rhel_8_4[]; ++extern const size_t hw_compat_rhel_8_4_len; ++ ++extern GlobalProperty hw_compat_rhel_8_3[]; ++extern const size_t hw_compat_rhel_8_3_len; ++ ++extern GlobalProperty hw_compat_rhel_8_2[]; ++extern const size_t hw_compat_rhel_8_2_len; ++ ++extern GlobalProperty hw_compat_rhel_8_1[]; ++extern const size_t hw_compat_rhel_8_1_len; ++ ++extern GlobalProperty hw_compat_rhel_8_0[]; ++extern const size_t hw_compat_rhel_8_0_len; ++ ++extern GlobalProperty hw_compat_rhel_7_6[]; ++extern const size_t hw_compat_rhel_7_6_len; ++ ++extern GlobalProperty hw_compat_rhel_7_5[]; ++extern const size_t hw_compat_rhel_7_5_len; ++ ++extern GlobalProperty hw_compat_rhel_7_4[]; ++extern const size_t hw_compat_rhel_7_4_len; ++ ++extern GlobalProperty hw_compat_rhel_7_3[]; ++extern const size_t hw_compat_rhel_7_3_len; ++ ++extern GlobalProperty hw_compat_rhel_7_2[]; ++extern const size_t hw_compat_rhel_7_2_len; ++ ++extern GlobalProperty hw_compat_rhel_7_1[]; ++extern const size_t hw_compat_rhel_7_1_len; ++ + #endif +diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h +index 5a0dd0c8cf..2cb1ec2bab 100644 +--- a/include/hw/firmware/smbios.h ++++ b/include/hw/firmware/smbios.h +@@ -278,7 +278,10 @@ void smbios_entry_add(QemuOpts *opts, Error **errp); + void smbios_set_cpuid(uint32_t version, uint32_t features); + void smbios_set_defaults(const char *manufacturer, const char *product, + const char *version, bool legacy_mode, +- bool uuid_encoded, SmbiosEntryPointType ep_type); ++ bool uuid_encoded, ++ const char *stream_product, ++ const char *stream_version, ++ SmbiosEntryPointType ep_type); + uint8_t *smbios_get_table_legacy(MachineState *ms, size_t *length); + void smbios_get_tables(MachineState *ms, + const struct smbios_phys_mem_area *mem_array, +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 9ab39e428f..7ccc9a1a07 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -107,6 +107,9 @@ struct PCMachineClass { + bool smbios_defaults; + bool smbios_legacy_mode; + bool smbios_uuid_encoded; ++ /* New fields needed for Windows HardwareID-6 matching */ ++ const char *smbios_stream_product; ++ const char *smbios_stream_version; + + /* RAM / address space compat: */ + bool gigabyte_align; +diff --git a/include/hw/usb.h b/include/hw/usb.h +index 33668dd0a9..e6b2fe72da 100644 +--- a/include/hw/usb.h ++++ b/include/hw/usb.h +@@ -582,4 +582,7 @@ void usb_pcap_init(FILE *fp); + void usb_pcap_ctrl(USBPacket *p, bool setup); + void usb_pcap_data(USBPacket *p, bool setup); + ++/* hcd-xhci.c -- rhel7.0.0 machine type compatibility */ ++extern bool migrate_cve_2014_5263_xhci_fields; ++ + #endif +diff --git a/migration/migration.c b/migration/migration.c +index abaf6f9e3d..a87ff01b81 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -164,6 +164,8 @@ INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, + MIGRATION_CAPABILITY_X_COLO, + MIGRATION_CAPABILITY_VALIDATE_UUID); + ++bool migrate_pre_2_2; ++ + /* When we add fault tolerance, we could have several + migrations at once. For now we don't need to add + dynamic creation of migration */ +diff --git a/migration/migration.h b/migration/migration.h +index 8130b703eb..d016cedd9d 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -381,6 +381,11 @@ bool check_dirty_bitmap_mig_alias_map(const BitmapMigrationNodeAliasList *bbm, + void migrate_add_address(SocketAddress *address); + + int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque); ++/* ++ * Disables a load of subsections that were added in 2.2/rh7.2 for backwards ++ * migration compatibility. ++ */ ++extern bool migrate_pre_2_2; + + #define qemu_ram_foreach_block \ + #warning "Use foreach_not_ignored_block in migration code" +-- +2.27.0 + diff --git a/0008-Add-aarch64-machine-types.patch b/0008-Add-aarch64-machine-types.patch new file mode 100644 index 0000000..2e8c417 --- /dev/null +++ b/0008-Add-aarch64-machine-types.patch @@ -0,0 +1,405 @@ +From 670e90f5cbd92189155e079b8c6e2aafdf82d162 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 19 Oct 2018 12:53:31 +0200 +Subject: Add aarch64 machine types + +Adding changes to add RHEL machine types for aarch64 architecture. + +Signed-off-by: Miroslav Rezanina + +Rebase notes (4.0.0): +- Use upstream compat handling + +Rebase notes (4.1.0-rc0): +- Removed a15memmap (upstream) +- Use virt_flash_create in rhel800_virt_instance_init + +Rebase notes (4.2.0-rc0): +- Set numa_mem_supported + +Rebase notes (4.2.0-rc3): +- aarch64: Add virt-rhel8.2.0 machine type for ARM (patch 92246) +- aarch64: virt: Allow more than 1TB of RAM (patch 92249) +- aarch64: virt: Allow PCDIMM instantiation (patch 92247) +- aarch64: virt: Enhance the comment related to gic-version (patch 92248) + +Rebase notes (5.0.0): +- Set default_ram_id in rhel_machine_class_init +- Added setting acpi properties + +Rebase notes (5.1.0): +- Added ras property +- Added to virt_machine_device_unplug_cb to machine type (upstream) +- added mte property (upstream) + +Rebase notes (weekly-210210): +- Added support for oem fields to machine type + +Rebase notes (weekly-210303): +- Use rhel-8.4.0 hw compat + +Rebase notes (6.0.0-rc2): +- renamed oem-id and oem-table-id to x-oem-id and x-oem-table-id + +Rebase notes (210623): +- Protect TPM functions by CONFIG_TPM ifdef + +Rebase notes (6.1.0-rc0): +- Add support for default_bus_bypass_iommu + +Merged patches (4.0.0): +- 7bfdb4c aarch64: Add virt-rhel8.0.0 machine type for ARM +- 3433e69 aarch64: Set virt-rhel8.0.0 max_cpus to 512 +- 4d20863 aarch64: Use 256MB ECAM region by default + +Merged patches (4.1.0): +- c3e39ef aarch64: Add virt-rhel8.1.0 machine type for ARM +- 59a46d1 aarch64: Allow ARM VIRT iommu option in RHEL8.1 machine + +Merged patches (5.2.0 rc0): +- 12990ad hw/arm: Changes to rhel820 machine +- 46d5a79 hw/arm: Introduce rhel_virt_instance_init() helper +- 098954a hw/arm: Add rhel830 machine type +- ee8e99d arm: Set correct max_cpus value on virt-rhel* machine types +- e5edd38 RHEL-only: arm/virt: Allow the TPM_TIS_SYSBUS device dynamic allocation in machvirt +- 6d7ba66 machine types/numa: set numa_mem_supported on old machine types (partialy) +- 25c5644 machine_types/numa: compatibility for auto_enable_numa_with_memdev (partialy) + +Merged patches (6.0): +- 078fadb5da AArch64 machine types cleanup +- ea7b7425fa hw/arm/virt: Add 8.4 Machine type + +Merged patches (weekly-210609): +- 73b1578882 hw/arm/virt: Add 8.5 machine type +- 5333038d11 hw/arm/virt: Disable PL011 clock migration through hw_compat_rhel_8_3 +- 63adb8ae86 arm/virt: Register highmem and gic-version as class properties + +Merged patches (weekly-211027): +- 86e3057c0a hw: arm: virt: Add hw_compat_rhel_8_5 to 8.5 machine type +--- + hw/arm/virt.c | 226 +++++++++++++++++++++++++++++++++++++++++- + hw/core/machine.c | 2 + + include/hw/arm/virt.h | 8 ++ + 3 files changed, 235 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 5de4d9d73b..c77d26ab13 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -79,6 +79,7 @@ + #include "hw/char/pl011.h" + #include "qemu/guest-random.h" + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ + static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ + void *data) \ +@@ -105,7 +106,48 @@ + DEFINE_VIRT_MACHINE_LATEST(major, minor, true) + #define DEFINE_VIRT_MACHINE(major, minor) \ + DEFINE_VIRT_MACHINE_LATEST(major, minor, false) +- ++#endif /* disabled for RHEL */ ++ ++#define DEFINE_RHEL_MACHINE_LATEST(m, n, s, latest) \ ++ static void rhel##m##n##s##_virt_class_init(ObjectClass *oc, \ ++ void *data) \ ++ { \ ++ MachineClass *mc = MACHINE_CLASS(oc); \ ++ rhel##m##n##s##_virt_options(mc); \ ++ mc->desc = "RHEL " # m "." # n "." # s " ARM Virtual Machine"; \ ++ if (latest) { \ ++ mc->alias = "virt"; \ ++ mc->is_default = 1; \ ++ } \ ++ } \ ++ static const TypeInfo rhel##m##n##s##_machvirt_info = { \ ++ .name = MACHINE_TYPE_NAME("virt-rhel" # m "." # n "." # s), \ ++ .parent = TYPE_RHEL_MACHINE, \ ++ .class_init = rhel##m##n##s##_virt_class_init, \ ++ }; \ ++ static void rhel##m##n##s##_machvirt_init(void) \ ++ { \ ++ type_register_static(&rhel##m##n##s##_machvirt_info); \ ++ } \ ++ type_init(rhel##m##n##s##_machvirt_init); ++ ++#define DEFINE_RHEL_MACHINE_AS_LATEST(major, minor, subminor) \ ++ DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, true) ++#define DEFINE_RHEL_MACHINE(major, minor, subminor) \ ++ DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, false) ++ ++/* This variable is for changes to properties that are RHEL specific, ++ * different to the current upstream and to be applied to the latest ++ * machine type. ++ */ ++GlobalProperty arm_rhel_compat[] = { ++ { ++ .driver = "virtio-net-pci", ++ .property = "romfile", ++ .value = "", ++ }, ++}; ++const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat); + + /* Number of external interrupt lines to configure the GIC with */ + #define NUM_IRQS 256 +@@ -2180,6 +2222,7 @@ static void machvirt_init(MachineState *machine) + qemu_add_machine_init_done_notifier(&vms->machine_done); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static bool virt_get_secure(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -2207,6 +2250,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) + + vms->virt = value; + } ++#endif /* disabled for RHEL */ + + static bool virt_get_highmem(Object *obj, Error **errp) + { +@@ -2304,6 +2348,7 @@ static void virt_set_acpi(Object *obj, Visitor *v, const char *name, + visit_type_OnOffAuto(v, name, &vms->acpi, errp); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static bool virt_get_ras(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -2331,6 +2376,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) + + vms->mte = value; + } ++#endif /* disabled for RHEL */ + + static char *virt_get_gic_version(Object *obj, Error **errp) + { +@@ -2666,6 +2712,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) + return fixed_ipa ? 0 : requested_pa_size; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void virt_machine_class_init(ObjectClass *oc, void *data) + { + MachineClass *mc = MACHINE_CLASS(oc); +@@ -3031,3 +3078,180 @@ static void virt_machine_2_6_options(MachineClass *mc) + vmc->no_pmu = true; + } + DEFINE_VIRT_MACHINE(2, 6) ++#endif /* disabled for RHEL */ ++ ++static void rhel_machine_class_init(ObjectClass *oc, void *data) ++{ ++ MachineClass *mc = MACHINE_CLASS(oc); ++ HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); ++ ++ mc->family = "virt-rhel-Z"; ++ mc->init = machvirt_init; ++ /* Maximum supported VCPU count for all virt-rhel* machines */ ++ mc->max_cpus = 384; ++#ifdef CONFIG_TPM ++ machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS); ++#endif ++ mc->block_default_type = IF_VIRTIO; ++ mc->no_cdrom = 1; ++ mc->pci_allow_0_address = true; ++ /* We know we will never create a pre-ARMv7 CPU which needs 1K pages */ ++ mc->minimum_page_bits = 12; ++ mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids; ++ mc->cpu_index_to_instance_props = virt_cpu_index_to_props; ++ mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a57"); ++ mc->get_default_cpu_node_id = virt_get_default_cpu_node_id; ++ mc->kvm_type = virt_kvm_type; ++ assert(!mc->get_hotplug_handler); ++ mc->get_hotplug_handler = virt_machine_get_hotplug_handler; ++ hc->pre_plug = virt_machine_device_pre_plug_cb; ++ hc->plug = virt_machine_device_plug_cb; ++ hc->unplug_request = virt_machine_device_unplug_request_cb; ++ hc->unplug = virt_machine_device_unplug_cb; ++ mc->nvdimm_supported = true; ++ mc->auto_enable_numa_with_memhp = true; ++ mc->auto_enable_numa_with_memdev = true; ++ mc->default_ram_id = "mach-virt.ram"; ++ ++ object_class_property_add(oc, "acpi", "OnOffAuto", ++ virt_get_acpi, virt_set_acpi, ++ NULL, NULL); ++ object_class_property_set_description(oc, "acpi", ++ "Enable ACPI"); ++ ++ object_class_property_add_bool(oc, "highmem", virt_get_highmem, ++ virt_set_highmem); ++ object_class_property_set_description(oc, "highmem", ++ "Set on/off to enable/disable using " ++ "physical address space above 32 bits"); ++ ++ object_class_property_add_str(oc, "gic-version", virt_get_gic_version, ++ virt_set_gic_version); ++ object_class_property_set_description(oc, "gic-version", ++ "Set GIC version. " ++ "Valid values are 2, 3, host and max"); ++ ++ object_class_property_add_str(oc, "x-oem-id", ++ virt_get_oem_id, ++ virt_set_oem_id); ++ object_class_property_set_description(oc, "x-oem-id", ++ "Override the default value of field OEMID " ++ "in ACPI table header." ++ "The string may be up to 6 bytes in size"); ++ ++ object_class_property_add_str(oc, "x-oem-table-id", ++ virt_get_oem_table_id, ++ virt_set_oem_table_id); ++ object_class_property_set_description(oc, "x-oem-table-id", ++ "Override the default value of field OEM Table ID " ++ "in ACPI table header." ++ "The string may be up to 8 bytes in size"); ++ object_class_property_add_bool(oc, "default_bus_bypass_iommu", ++ virt_get_default_bus_bypass_iommu, ++ virt_set_default_bus_bypass_iommu); ++ ++} ++ ++static void rhel_virt_instance_init(Object *obj) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(obj); ++ VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); ++ ++ /* EL3 is disabled by default and non-configurable for RHEL */ ++ vms->secure = false; ++ ++ /* EL2 is disabled by default and non-configurable for RHEL */ ++ vms->virt = false; ++ ++ /* High memory is enabled by default */ ++ vms->highmem = true; ++ vms->gic_version = VIRT_GIC_VERSION_NOSEL; ++ ++ vms->highmem_ecam = !vmc->no_highmem_ecam; ++ ++ if (vmc->no_its) { ++ vms->its = false; ++ } else { ++ /* Default allows ITS instantiation */ ++ vms->its = true; ++ object_property_add_bool(obj, "its", virt_get_its, ++ virt_set_its); ++ object_property_set_description(obj, "its", ++ "Set on/off to enable/disable " ++ "ITS instantiation"); ++ } ++ ++ /* Default disallows iommu instantiation */ ++ vms->iommu = VIRT_IOMMU_NONE; ++ object_property_add_str(obj, "iommu", virt_get_iommu, virt_set_iommu); ++ object_property_set_description(obj, "iommu", ++ "Set the IOMMU type. " ++ "Valid values are none and smmuv3"); ++ ++ /* Default disallows RAS instantiation and is non-configurable for RHEL */ ++ vms->ras = false; ++ ++ /* MTE is disabled by default and non-configurable for RHEL */ ++ vms->mte = false; ++ ++ vms->default_bus_bypass_iommu = false; ++ vms->irqmap = a15irqmap; ++ ++ virt_flash_create(vms); ++ vms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); ++ vms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); ++ ++} ++ ++static const TypeInfo rhel_machine_info = { ++ .name = TYPE_RHEL_MACHINE, ++ .parent = TYPE_MACHINE, ++ .abstract = true, ++ .instance_size = sizeof(VirtMachineState), ++ .class_size = sizeof(VirtMachineClass), ++ .class_init = rhel_machine_class_init, ++ .instance_init = rhel_virt_instance_init, ++ .interfaces = (InterfaceInfo[]) { ++ { TYPE_HOTPLUG_HANDLER }, ++ { } ++ }, ++}; ++ ++static void rhel_machine_init(void) ++{ ++ type_register_static(&rhel_machine_info); ++} ++type_init(rhel_machine_init); ++ ++static void rhel850_virt_options(MachineClass *mc) ++{ ++ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); ++} ++DEFINE_RHEL_MACHINE_AS_LATEST(8, 5, 0) ++ ++static void rhel840_virt_options(MachineClass *mc) ++{ ++ rhel850_virt_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_4, hw_compat_rhel_8_4_len); ++} ++DEFINE_RHEL_MACHINE(8, 4, 0) ++ ++static void rhel830_virt_options(MachineClass *mc) ++{ ++ VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); ++ ++ rhel840_virt_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_3, hw_compat_rhel_8_3_len); ++ vmc->no_kvm_steal_time = true; ++} ++DEFINE_RHEL_MACHINE(8, 3, 0) ++ ++static void rhel820_virt_options(MachineClass *mc) ++{ ++ rhel830_virt_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_2, hw_compat_rhel_8_2_len); ++ mc->numa_mem_supported = true; ++ mc->auto_enable_numa_with_memdev = false; ++} ++DEFINE_RHEL_MACHINE(8, 2, 0) +diff --git a/hw/core/machine.c b/hw/core/machine.c +index be4f9864cd..62febde5aa 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -87,6 +87,8 @@ GlobalProperty hw_compat_rhel_8_3[] = { + { "nvme", "use-intel-id", "on"}, + /* hw_compat_rhel_8_3 from hw_compat_5_1 */ + { "pvpanic", "events", "1"}, /* PVPANIC_PANICKED */ ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "pl011", "migrate-clk", "off" }, + /* hw_compat_rhel_8_3 bz 1912846 */ + { "pci-xhci", "x-rh-late-msi-cap", "off" }, + /* hw_compat_rhel_8_3 from hw_compat_5_1 */ +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index dc6b66ffc8..9364628847 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -175,9 +175,17 @@ struct VirtMachineState { + + #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) + ++#if 0 /* disabled for Red Hat Enterprise Linux */ + #define TYPE_VIRT_MACHINE MACHINE_TYPE_NAME("virt") + OBJECT_DECLARE_TYPE(VirtMachineState, VirtMachineClass, VIRT_MACHINE) + ++#else ++#define TYPE_RHEL_MACHINE MACHINE_TYPE_NAME("virt-rhel") ++typedef struct VirtMachineClass VirtMachineClass; ++typedef struct VirtMachineState VirtMachineState; ++DECLARE_OBJ_CHECKERS(VirtMachineState, VirtMachineClass, VIRT_MACHINE, TYPE_RHEL_MACHINE) ++#endif ++ + void virt_acpi_setup(VirtMachineState *vms); + bool virt_is_acpi_enabled(VirtMachineState *vms); + +-- +2.27.0 + diff --git a/0008-Add-ppc64-machine-types.patch b/0008-Add-ppc64-machine-types.patch new file mode 100644 index 0000000..860e803 --- /dev/null +++ b/0008-Add-ppc64-machine-types.patch @@ -0,0 +1,528 @@ +From f61b3d7dc000886e23943457ee9baf1d4cae43b4 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 19 Oct 2018 13:27:13 +0200 +Subject: Add ppc64 machine types + +Adding changes to add RHEL machine types for ppc64 architecture. + +Signed-off-by: Miroslav Rezanina + +Rebase notes (6.2.0): +- Fixed rebase conflict relicts +- Update machine type compat for 6.2 (from MR 66) + +Merged patches (6.1.0): +- c438c25ac3 redhat: Define pseries-rhel8.5.0 machine type +- a3995e2eff Remove RHEL 7.0.0 machine type (only ppc64 changes) +- ad3190a79b Remove RHEL 7.1.0 machine type (only ppc64 changes) +- 84bbe15d4e Remove RHEL 7.2.0 machine type (only ppc64 changes) +- 0215eb3356 Remove RHEL 7.3.0 machine types (only ppc64 changes) +- af69d1ca6e Remove RHEL 7.4.0 machine types (only ppc64 changes) +- 8f7a74ab78 Remove RHEL 7.5.0 machine types (only ppc64 changes) +--- + hw/ppc/spapr.c | 243 ++++++++++++++++++++++++++++++++++++++++ + hw/ppc/spapr_cpu_core.c | 13 +++ + include/hw/ppc/spapr.h | 4 + + target/ppc/compat.c | 13 ++- + target/ppc/cpu.h | 1 + + target/ppc/kvm.c | 27 +++++ + target/ppc/kvm_ppc.h | 13 +++ + 7 files changed, 313 insertions(+), 1 deletion(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index a4372ba189..5fdf8b506d 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -1622,6 +1622,9 @@ static void spapr_machine_reset(MachineState *machine) + + pef_kvm_reset(machine->cgs, &error_fatal); + spapr_caps_apply(spapr); ++ if (spapr->svm_allowed) { ++ kvmppc_svm_allow(&error_fatal); ++ } + + first_ppc_cpu = POWERPC_CPU(first_cpu); + if (kvm_enabled() && kvmppc_has_cap_mmu_radix() && +@@ -3317,6 +3320,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) + spapr->host_serial = g_strdup(value); + } + ++static bool spapr_get_svm_allowed(Object *obj, Error **errp) ++{ ++ SpaprMachineState *spapr = SPAPR_MACHINE(obj); ++ ++ return spapr->svm_allowed; ++} ++ ++static void spapr_set_svm_allowed(Object *obj, bool value, Error **errp) ++{ ++ SpaprMachineState *spapr = SPAPR_MACHINE(obj); ++ ++ spapr->svm_allowed = value; ++} ++ + static void spapr_instance_init(Object *obj) + { + SpaprMachineState *spapr = SPAPR_MACHINE(obj); +@@ -3395,6 +3412,12 @@ static void spapr_instance_init(Object *obj) + spapr_get_host_serial, spapr_set_host_serial); + object_property_set_description(obj, "host-serial", + "Host serial number to advertise in guest device tree"); ++ object_property_add_bool(obj, "x-svm-allowed", ++ spapr_get_svm_allowed, ++ spapr_set_svm_allowed); ++ object_property_set_description(obj, "x-svm-allowed", ++ "Allow the guest to become a Secure Guest" ++ " (experimental only)"); + } + + static void spapr_machine_finalizefn(Object *obj) +@@ -4652,6 +4675,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) + vmc->client_architecture_support = spapr_vof_client_architecture_support; + vmc->quiesce = spapr_vof_quiesce; + vmc->setprop = spapr_vof_setprop; ++ smc->has_power9_support = true; + } + + static const TypeInfo spapr_machine_info = { +@@ -4703,6 +4727,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) + } \ + type_init(spapr_machine_register_##suffix) + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + /* + * pseries-7.0 + */ +@@ -4830,6 +4855,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) + } + + DEFINE_SPAPR_MACHINE(4_1, "4.1", false); ++#endif + + /* + * pseries-4.0 +@@ -4849,6 +4875,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, + *nv2atsd = 0; + return true; + } ++ ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void spapr_machine_4_0_class_options(MachineClass *mc) + { + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); +@@ -5176,6 +5204,221 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) + compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len); + } + DEFINE_SPAPR_MACHINE(2_1, "2.1", false); ++#endif ++ ++static void spapr_machine_rhel_default_class_options(MachineClass *mc) ++{ ++ /* ++ * Defaults for the latest behaviour inherited from the base class ++ * can be overriden here for all pseries-rhel* machines. ++ */ ++ ++ /* Maximum supported VCPU count */ ++ mc->max_cpus = 384; ++} ++ ++/* ++ * pseries-rhel8.5.0 ++ * like pseries-6.0 ++ */ ++ ++static void spapr_machine_rhel850_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ /* The default machine type must apply the RHEL specific defaults */ ++ spapr_machine_rhel_default_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_5, ++ hw_compat_rhel_8_5_len); ++ smc->pre_6_2_numa_affinity = true; ++ mc->smp_props.prefer_sockets = true; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel850, "rhel8.5.0", true); ++ ++/* ++ * pseries-rhel8.4.0 ++ * like pseries-5.2 ++ */ ++ ++static void spapr_machine_rhel840_class_options(MachineClass *mc) ++{ ++ spapr_machine_rhel850_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_4, ++ hw_compat_rhel_8_4_len); ++} ++ ++DEFINE_SPAPR_MACHINE(rhel840, "rhel8.4.0", false); ++ ++/* ++ * pseries-rhel8.3.0 ++ * like pseries-5.1 ++ */ ++ ++static void spapr_machine_rhel830_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel840_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_3, ++ hw_compat_rhel_8_3_len); ++ ++ /* from pseries-5.1 */ ++ smc->pre_5_2_numa_associativity = true; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel830, "rhel8.3.0", false); ++ ++/* ++ * pseries-rhel8.2.0 ++ * like pseries-4.2 + pseries-5.0 ++ * except SPAPR_CAP_CCF_ASSIST that has been backported to pseries-rhel8.1.0 ++ */ ++ ++static void spapr_machine_rhel820_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ /* from pseries-5.0 */ ++ static GlobalProperty compat[] = { ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pre-5.1-associativity", "on" }, ++ }; ++ ++ spapr_machine_rhel830_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_2, ++ hw_compat_rhel_8_2_len); ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); ++ ++ /* from pseries-4.2 */ ++ smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_OFF; ++ smc->rma_limit = 16 * GiB; ++ mc->nvdimm_supported = false; ++ ++ /* from pseries-5.0 */ ++ mc->numa_mem_supported = true; ++ smc->pre_5_1_assoc_refpoints = true; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel820, "rhel8.2.0", false); ++ ++/* ++ * pseries-rhel8.1.0 ++ * like pseries-4.1 ++ */ ++ ++static void spapr_machine_rhel810_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ static GlobalProperty compat[] = { ++ /* Only allow 4kiB and 64kiB IOMMU pagesizes */ ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pgsz", "0x11000" }, ++ }; ++ ++ spapr_machine_rhel820_class_options(mc); ++ ++ /* from pseries-4.1 */ ++ smc->linux_pci_probe = false; ++ smc->smp_threads_vsmt = false; ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_1, ++ hw_compat_rhel_8_1_len); ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); ++ ++ /* from pseries-4.2 */ ++ smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel810, "rhel8.1.0", false); ++ ++/* ++ * pseries-rhel8.0.0 ++ * like pseries-3.1 and pseries-4.0 ++ * except SPAPR_CAP_CFPC, SPAPR_CAP_SBBC and SPAPR_CAP_IBS ++ * that have been backported to pseries-rhel8.0.0 ++ */ ++ ++static void spapr_machine_rhel800_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel810_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_0, ++ hw_compat_rhel_8_0_len); ++ ++ /* pseries-4.0 */ ++ smc->phb_placement = phb_placement_4_0; ++ smc->irq = &spapr_irq_xics; ++ smc->pre_4_1_migration = true; ++ ++ /* pseries-3.1 */ ++ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0"); ++ smc->update_dt_enabled = false; ++ smc->dr_phb_enabled = false; ++ smc->broken_host_serial_model = true; ++ smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel800, "rhel8.0.0", false); ++ ++/* ++ * pseries-rhel7.6.0 ++ * like spapr_compat_2_12 and spapr_compat_3_0 ++ * spapr_compat_0 is empty ++ */ ++GlobalProperty spapr_compat_rhel7_6[] = { ++ { TYPE_POWERPC_CPU, "pre-3.0-migration", "on" }, ++ { TYPE_SPAPR_CPU_CORE, "pre-3.0-migration", "on" }, ++}; ++const size_t spapr_compat_rhel7_6_len = G_N_ELEMENTS(spapr_compat_rhel7_6); ++ ++ ++static void spapr_machine_rhel760_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel800_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); ++ compat_props_add(mc->compat_props, spapr_compat_rhel7_6, spapr_compat_rhel7_6_len); ++ ++ /* from spapr_machine_3_0_class_options() */ ++ smc->legacy_irq_allocation = true; ++ smc->nr_xirqs = 0x400; ++ smc->irq = &spapr_irq_xics_legacy; ++ ++ /* from spapr_machine_2_12_class_options() */ ++ /* We depend on kvm_enabled() to choose a default value for the ++ * hpt-max-page-size capability. Of course we can't do it here ++ * because this is too early and the HW accelerator isn't initialzed ++ * yet. Postpone this to machine init (see default_caps_with_cpu()). ++ */ ++ smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 0; ++ ++ /* SPAPR_CAP_WORKAROUND enabled in pseries-rhel800 by ++ * f21757edc554 ++ * "Enable mitigations by default for pseries-4.0 machine type") ++ */ ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_BROKEN; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel760, "rhel7.6.0", false); ++ ++/* ++ * pseries-rhel7.6.0-sxxm ++ * ++ * pseries-rhel7.6.0 with speculative execution exploit mitigations enabled by default ++ */ ++ ++static void spapr_machine_rhel760sxxm_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel760_class_options(mc); ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel760sxxm, "rhel7.6.0-sxxm", false); + + static void spapr_machine_register_types(void) + { +diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c +index fcb5dfe792..ab8fb5bf62 100644 +--- a/hw/ppc/spapr_cpu_core.c ++++ b/hw/ppc/spapr_cpu_core.c +@@ -25,6 +25,7 @@ + #include "sysemu/reset.h" + #include "sysemu/hw_accel.h" + #include "qemu/error-report.h" ++#include "cpu-models.h" + + static void spapr_reset_vcpu(PowerPCCPU *cpu) + { +@@ -259,6 +260,7 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, + { + CPUPPCState *env = &cpu->env; + CPUState *cs = CPU(cpu); ++ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); + + if (!qdev_realize(DEVICE(cpu), NULL, errp)) { + return false; +@@ -270,6 +272,17 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, + /* Set time-base frequency to 512 MHz. vhyp must be set first. */ + cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ); + ++ if (!smc->has_power9_support && ++ (((spapr->max_compat_pvr && ++ ppc_compat_cmp(spapr->max_compat_pvr, ++ CPU_POWERPC_LOGICAL_3_00) >= 0)) || ++ (!spapr->max_compat_pvr && ++ ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0, 0)))) { ++ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, ++ "POWER9 CPU is not supported by this machine class"); ++ return false; ++ } ++ + if (spapr_irq_cpu_intc_create(spapr, cpu, errp) < 0) { + qdev_unrealize(DEVICE(cpu)); + return false; +diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h +index f5c33dcc86..4a68e0a901 100644 +--- a/include/hw/ppc/spapr.h ++++ b/include/hw/ppc/spapr.h +@@ -154,6 +154,7 @@ struct SpaprMachineClass { + bool pre_5_2_numa_associativity; + bool pre_6_2_numa_affinity; + ++ bool has_power9_support; + bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index, + uint64_t *buid, hwaddr *pio, + hwaddr *mmio32, hwaddr *mmio64, +@@ -241,6 +242,9 @@ struct SpaprMachineState { + /* Set by -boot */ + char *boot_device; + ++ /* Secure Guest support via x-svm-allowed */ ++ bool svm_allowed; ++ + /*< public >*/ + char *kvm_type; + char *host_model; +diff --git a/target/ppc/compat.c b/target/ppc/compat.c +index 7949a24f5a..f207a9ba01 100644 +--- a/target/ppc/compat.c ++++ b/target/ppc/compat.c +@@ -114,8 +114,19 @@ static const CompatInfo *compat_by_pvr(uint32_t pvr) + return NULL; + } + ++long ppc_compat_cmp(uint32_t pvr1, uint32_t pvr2) ++{ ++ const CompatInfo *compat1 = compat_by_pvr(pvr1); ++ const CompatInfo *compat2 = compat_by_pvr(pvr2); ++ ++ g_assert(compat1); ++ g_assert(compat2); ++ ++ return compat1 - compat2; ++} ++ + static bool pcc_compat(PowerPCCPUClass *pcc, uint32_t compat_pvr, +- uint32_t min_compat_pvr, uint32_t max_compat_pvr) ++ uint32_t min_compat_pvr, uint32_t max_compat_pvr) + { + const CompatInfo *compat = compat_by_pvr(compat_pvr); + const CompatInfo *min = compat_by_pvr(min_compat_pvr); +diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h +index 047b24ba50..79c5ac50b9 100644 +--- a/target/ppc/cpu.h ++++ b/target/ppc/cpu.h +@@ -1462,6 +1462,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) + + /* Compatibility modes */ + #if defined(TARGET_PPC64) ++long ppc_compat_cmp(uint32_t pvr1, uint32_t pvr2); + bool ppc_check_compat(PowerPCCPU *cpu, uint32_t compat_pvr, + uint32_t min_compat_pvr, uint32_t max_compat_pvr); + bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr, +diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c +index dc93b99189..154888cce5 100644 +--- a/target/ppc/kvm.c ++++ b/target/ppc/kvm.c +@@ -90,6 +90,7 @@ static int cap_ppc_nested_kvm_hv; + static int cap_large_decr; + static int cap_fwnmi; + static int cap_rpt_invalidate; ++static int cap_ppc_secure_guest; + + static uint32_t debug_inst_opcode; + +@@ -137,6 +138,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); + kvmppc_get_cpu_characteristics(s); + cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV); ++ cap_ppc_secure_guest = kvm_vm_check_extension(s, KVM_CAP_PPC_SECURE_GUEST); + cap_large_decr = kvmppc_get_dec_bits(); + cap_fwnmi = kvm_vm_check_extension(s, KVM_CAP_PPC_FWNMI); + /* +@@ -2563,6 +2565,16 @@ int kvmppc_has_cap_rpt_invalidate(void) + return cap_rpt_invalidate; + } + ++bool kvmppc_has_cap_secure_guest(void) ++{ ++ return !!cap_ppc_secure_guest; ++} ++ ++int kvmppc_enable_cap_secure_guest(void) ++{ ++ return kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SECURE_GUEST, 0, 1); ++} ++ + PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) + { + uint32_t host_pvr = mfpvr(); +@@ -2959,3 +2971,18 @@ bool kvm_arch_cpu_check_are_resettable(void) + { + return true; + } ++ ++void kvmppc_svm_allow(Error **errp) ++{ ++ if (!kvm_enabled()) { ++ error_setg(errp, "No PEF support in tcg, try x-svm-allowed=off"); ++ return; ++ } ++ ++ if (!kvmppc_has_cap_secure_guest()) { ++ error_setg(errp, "KVM implementation does not support secure guests, " ++ "try x-svm-allowed=off"); ++ } else if (kvmppc_enable_cap_secure_guest() < 0) { ++ error_setg(errp, "Error enabling x-svm-allowed, try x-svm-allowed=off"); ++ } ++} +diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h +index ee9325bf9a..20dbb95989 100644 +--- a/target/ppc/kvm_ppc.h ++++ b/target/ppc/kvm_ppc.h +@@ -40,6 +40,7 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu); + target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, + bool radix, bool gtse, + uint64_t proc_tbl); ++void kvmppc_svm_allow(Error **errp); + #ifndef CONFIG_USER_ONLY + bool kvmppc_spapr_use_multitce(void); + int kvmppc_spapr_enable_inkernel_multitce(void); +@@ -74,6 +75,8 @@ int kvmppc_get_cap_large_decr(void); + int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable); + int kvmppc_has_cap_rpt_invalidate(void); + int kvmppc_enable_hwrng(void); ++bool kvmppc_has_cap_secure_guest(void); ++int kvmppc_enable_cap_secure_guest(void); + int kvmppc_put_books_sregs(PowerPCCPU *cpu); + PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void); + void kvmppc_check_papr_resize_hpt(Error **errp); +@@ -393,6 +396,16 @@ static inline int kvmppc_has_cap_rpt_invalidate(void) + return false; + } + ++static inline bool kvmppc_has_cap_secure_guest(void) ++{ ++ return false; ++} ++ ++static inline int kvmppc_enable_cap_secure_guest(void) ++{ ++ return -1; ++} ++ + static inline int kvmppc_enable_hwrng(void) + { + return -1; +-- +2.31.1 + diff --git a/0009-Add-ppc64-machine-types.patch b/0009-Add-ppc64-machine-types.patch new file mode 100644 index 0000000..f5ce09a --- /dev/null +++ b/0009-Add-ppc64-machine-types.patch @@ -0,0 +1,714 @@ +From 3c65320ce5b8ad3bb8c0d8fd13a88c464d5c5845 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 19 Oct 2018 13:27:13 +0200 +Subject: Add ppc64 machine types + +Adding changes to add RHEL machine types for ppc64 architecture. + +Signed-off-by: Miroslav Rezanina + +Rebase changes (4.0.0): +- remove instance options and use upstream solution +- Use upstream compat handling +- Replace SPAPR_PCI_2_7_MMIO_WIN_SIZE with value (changed upstream) +- re-add handling of instance_options (removed upstream) +- Use p8 as default for rhel machine types (p9 default upstream) +- sPAPRMachineClass renamed to SpaprMachineClass (upstream) + +Rebase changes (4.1.0): +- Update format for compat structures + +Rebase notes (weekly-210303): +- Use rhel-8.4.0 hw compat + +Merged patches (4.0.0): +- 467d59a redhat: define pseries-rhel8.0.0 machine type + +Merged patches (4.1.0): +- f21757edc target/ppc/spapr: Enable mitigations by default for pseries-4.0 machine type +- 2511c63 redhat: sync pseries-rhel7.6.0 with rhel-av-8.0.1 +- 89f01da redhat: define pseries-rhel8.1.0 machine type + +Merged patches (4.2.0): +- bcba728 redhat: update pseries-rhel8.1.0 machine type +- redhat: update pseries-rhel-7.6.0 machine type (patch 93039) +- redhat: define pseries-rhel8.2.0 machine type (patch 93041) + +Merged patches (5.1.0): +- eb121ff spapr: Enable DD2.3 accelerated count cache flush in pseries-5.0 machine (partial) + +Merged patches (5.2.0 rc0): +- 311a20f redhat: define pseries-rhel8.3.0 machine type +- 1284167 ppc: Set correct max_cpus value on spapr-rhel* machine types +- 1ab8783 redhat: update pseries-rhel8.2.0 machine type +- b162af531a target/ppc: Add experimental option for enabling secure guests + +Merged patches (weekly-201216): +- 943c936df3 redhat: Add spapr_machine_rhel_default_class_options() +- 030b5e6fba redhat: Define pseries-rhel8.4.0 machine type + +Merged patches (weekly-210602): +- b7128d8ef7 redhat: Define pseries-rhel8.5.0 machine type + +Merged patches (weekly-211006): +- c8f68b47e9 redhat: Update pseries-rhel8.5.0 +--- + hw/ppc/spapr.c | 382 ++++++++++++++++++++++++++++++++++++++++ + hw/ppc/spapr_cpu_core.c | 13 ++ + include/hw/ppc/spapr.h | 4 + + target/ppc/compat.c | 13 +- + target/ppc/cpu.h | 1 + + target/ppc/kvm.c | 27 +++ + target/ppc/kvm_ppc.h | 13 ++ + 7 files changed, 452 insertions(+), 1 deletion(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index 3b5fd749be..cace86028d 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -1593,6 +1593,9 @@ static void spapr_machine_reset(MachineState *machine) + + pef_kvm_reset(machine->cgs, &error_fatal); + spapr_caps_apply(spapr); ++ if (spapr->svm_allowed) { ++ kvmppc_svm_allow(&error_fatal); ++ } + + first_ppc_cpu = POWERPC_CPU(first_cpu); + if (kvm_enabled() && kvmppc_has_cap_mmu_radix() && +@@ -3288,6 +3291,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) + spapr->host_serial = g_strdup(value); + } + ++static bool spapr_get_svm_allowed(Object *obj, Error **errp) ++{ ++ SpaprMachineState *spapr = SPAPR_MACHINE(obj); ++ ++ return spapr->svm_allowed; ++} ++ ++static void spapr_set_svm_allowed(Object *obj, bool value, Error **errp) ++{ ++ SpaprMachineState *spapr = SPAPR_MACHINE(obj); ++ ++ spapr->svm_allowed = value; ++} ++ + static void spapr_instance_init(Object *obj) + { + SpaprMachineState *spapr = SPAPR_MACHINE(obj); +@@ -3366,6 +3383,12 @@ static void spapr_instance_init(Object *obj) + spapr_get_host_serial, spapr_set_host_serial); + object_property_set_description(obj, "host-serial", + "Host serial number to advertise in guest device tree"); ++ object_property_add_bool(obj, "x-svm-allowed", ++ spapr_get_svm_allowed, ++ spapr_set_svm_allowed); ++ object_property_set_description(obj, "x-svm-allowed", ++ "Allow the guest to become a Secure Guest" ++ " (experimental only)"); + } + + static void spapr_machine_finalizefn(Object *obj) +@@ -4614,6 +4637,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) + vmc->client_architecture_support = spapr_vof_client_architecture_support; + vmc->quiesce = spapr_vof_quiesce; + vmc->setprop = spapr_vof_setprop; ++ smc->has_power9_support = true; + } + + static const TypeInfo spapr_machine_info = { +@@ -4665,6 +4689,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) + } \ + type_init(spapr_machine_register_##suffix) + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + /* + * pseries-6.2 + */ +@@ -4781,6 +4806,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) + } + + DEFINE_SPAPR_MACHINE(4_1, "4.1", false); ++#endif + + /* + * pseries-4.0 +@@ -4800,6 +4826,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, + *nv2atsd = 0; + return true; + } ++ ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void spapr_machine_4_0_class_options(MachineClass *mc) + { + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); +@@ -4958,6 +4986,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); + /* + * pseries-2.7 + */ ++#endif + + static bool phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, + uint64_t *buid, hwaddr *pio, +@@ -5013,6 +5042,7 @@ static bool phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, + return true; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void spapr_machine_2_7_class_options(MachineClass *mc) + { + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); +@@ -5127,6 +5157,358 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) + compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len); + } + DEFINE_SPAPR_MACHINE(2_1, "2.1", false); ++#endif ++ ++static void spapr_machine_rhel_default_class_options(MachineClass *mc) ++{ ++ /* ++ * Defaults for the latest behaviour inherited from the base class ++ * can be overriden here for all pseries-rhel* machines. ++ */ ++ ++ /* Maximum supported VCPU count */ ++ mc->max_cpus = 384; ++} ++ ++/* ++ * pseries-rhel8.5.0 ++ * like pseries-6.0 ++ */ ++ ++static void spapr_machine_rhel850_class_options(MachineClass *mc) ++{ ++ /* The default machine type must apply the RHEL specific defaults */ ++ spapr_machine_rhel_default_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_5, ++ hw_compat_rhel_8_5_len); ++} ++ ++DEFINE_SPAPR_MACHINE(rhel850, "rhel8.5.0", true); ++ ++/* ++ * pseries-rhel8.4.0 ++ * like pseries-5.2 ++ */ ++ ++static void spapr_machine_rhel840_class_options(MachineClass *mc) ++{ ++ spapr_machine_rhel850_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_4, ++ hw_compat_rhel_8_4_len); ++} ++ ++DEFINE_SPAPR_MACHINE(rhel840, "rhel8.4.0", false); ++ ++/* ++ * pseries-rhel8.3.0 ++ * like pseries-5.1 ++ */ ++ ++static void spapr_machine_rhel830_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel840_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_3, ++ hw_compat_rhel_8_3_len); ++ ++ /* from pseries-5.1 */ ++ smc->pre_5_2_numa_associativity = true; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel830, "rhel8.3.0", false); ++ ++/* ++ * pseries-rhel8.2.0 ++ * like pseries-4.2 + pseries-5.0 ++ * except SPAPR_CAP_CCF_ASSIST that has been backported to pseries-rhel8.1.0 ++ */ ++ ++static void spapr_machine_rhel820_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ /* from pseries-5.0 */ ++ static GlobalProperty compat[] = { ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pre-5.1-associativity", "on" }, ++ }; ++ ++ spapr_machine_rhel830_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_2, ++ hw_compat_rhel_8_2_len); ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); ++ ++ /* from pseries-4.2 */ ++ smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_OFF; ++ smc->rma_limit = 16 * GiB; ++ mc->nvdimm_supported = false; ++ ++ /* from pseries-5.0 */ ++ mc->numa_mem_supported = true; ++ smc->pre_5_1_assoc_refpoints = true; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel820, "rhel8.2.0", false); ++ ++/* ++ * pseries-rhel8.1.0 ++ * like pseries-4.1 ++ */ ++ ++static void spapr_machine_rhel810_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ static GlobalProperty compat[] = { ++ /* Only allow 4kiB and 64kiB IOMMU pagesizes */ ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pgsz", "0x11000" }, ++ }; ++ ++ spapr_machine_rhel820_class_options(mc); ++ ++ /* from pseries-4.1 */ ++ smc->linux_pci_probe = false; ++ smc->smp_threads_vsmt = false; ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_1, ++ hw_compat_rhel_8_1_len); ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); ++ ++ /* from pseries-4.2 */ ++ smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel810, "rhel8.1.0", false); ++ ++/* ++ * pseries-rhel8.0.0 ++ * like pseries-3.1 and pseries-4.0 ++ * except SPAPR_CAP_CFPC, SPAPR_CAP_SBBC and SPAPR_CAP_IBS ++ * that have been backported to pseries-rhel8.0.0 ++ */ ++ ++static void spapr_machine_rhel800_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel810_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_0, ++ hw_compat_rhel_8_0_len); ++ ++ /* pseries-4.0 */ ++ smc->phb_placement = phb_placement_4_0; ++ smc->irq = &spapr_irq_xics; ++ smc->pre_4_1_migration = true; ++ ++ /* pseries-3.1 */ ++ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0"); ++ smc->update_dt_enabled = false; ++ smc->dr_phb_enabled = false; ++ smc->broken_host_serial_model = true; ++ smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel800, "rhel8.0.0", false); ++ ++/* ++ * pseries-rhel7.6.0 ++ * like spapr_compat_2_12 and spapr_compat_3_0 ++ * spapr_compat_0 is empty ++ */ ++GlobalProperty spapr_compat_rhel7_6[] = { ++ { TYPE_POWERPC_CPU, "pre-3.0-migration", "on" }, ++ { TYPE_SPAPR_CPU_CORE, "pre-3.0-migration", "on" }, ++}; ++const size_t spapr_compat_rhel7_6_len = G_N_ELEMENTS(spapr_compat_rhel7_6); ++ ++ ++static void spapr_machine_rhel760_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel800_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); ++ compat_props_add(mc->compat_props, spapr_compat_rhel7_6, spapr_compat_rhel7_6_len); ++ ++ /* from spapr_machine_3_0_class_options() */ ++ smc->legacy_irq_allocation = true; ++ smc->nr_xirqs = 0x400; ++ smc->irq = &spapr_irq_xics_legacy; ++ ++ /* from spapr_machine_2_12_class_options() */ ++ /* We depend on kvm_enabled() to choose a default value for the ++ * hpt-max-page-size capability. Of course we can't do it here ++ * because this is too early and the HW accelerator isn't initialzed ++ * yet. Postpone this to machine init (see default_caps_with_cpu()). ++ */ ++ smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 0; ++ ++ /* SPAPR_CAP_WORKAROUND enabled in pseries-rhel800 by ++ * f21757edc554 ++ * "Enable mitigations by default for pseries-4.0 machine type") ++ */ ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_BROKEN; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel760, "rhel7.6.0", false); ++ ++/* ++ * pseries-rhel7.6.0-sxxm ++ * ++ * pseries-rhel7.6.0 with speculative execution exploit mitigations enabled by default ++ */ ++ ++static void spapr_machine_rhel760sxxm_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel760_class_options(mc); ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel760sxxm, "rhel7.6.0-sxxm", false); ++ ++static void spapr_machine_rhel750_class_options(MachineClass *mc) ++{ ++ spapr_machine_rhel760_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); ++ ++} ++ ++DEFINE_SPAPR_MACHINE(rhel750, "rhel7.5.0", false); ++ ++/* ++ * pseries-rhel7.5.0-sxxm ++ * ++ * pseries-rhel7.5.0 with speculative execution exploit mitigations enabled by default ++ */ ++ ++static void spapr_machine_rhel750sxxm_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel750_class_options(mc); ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel750sxxm, "rhel7.5.0-sxxm", false); ++ ++/* ++ * pseries-rhel7.4.0 ++ * like spapr_compat_2_9 ++ */ ++GlobalProperty spapr_compat_rhel7_4[] = { ++ { TYPE_POWERPC_CPU, "pre-2.10-migration", "on" }, ++}; ++const size_t spapr_compat_rhel7_4_len = G_N_ELEMENTS(spapr_compat_rhel7_4); ++ ++static void spapr_machine_rhel740_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel750_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_4, hw_compat_rhel_7_4_len); ++ compat_props_add(mc->compat_props, spapr_compat_rhel7_4, spapr_compat_rhel7_4_len); ++ smc->has_power9_support = false; ++ smc->pre_2_10_has_unused_icps = true; ++ smc->resize_hpt_default = SPAPR_RESIZE_HPT_DISABLED; ++ smc->default_caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_ON; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel740, "rhel7.4.0", false); ++ ++/* ++ * pseries-rhel7.4.0-sxxm ++ * ++ * pseries-rhel7.4.0 with speculative execution exploit mitigations enabled by default ++ */ ++ ++static void spapr_machine_rhel740sxxm_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel740_class_options(mc); ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel740sxxm, "rhel7.4.0-sxxm", false); ++ ++/* ++ * pseries-rhel7.3.0 ++ * like spapr_compat_2_6/_2_7/_2_8 but "ddw" has been backported to RHEL7_3 ++ */ ++GlobalProperty spapr_compat_rhel7_3[] = { ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "mem_win_size", "0xf80000000" }, ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "mem64_win_size", "0" }, ++ { TYPE_POWERPC_CPU, "pre-2.8-migration", "on" }, ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pre-2.8-migration", "on" }, ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pcie-extended-configuration-space", "off" }, ++}; ++const size_t spapr_compat_rhel7_3_len = G_N_ELEMENTS(spapr_compat_rhel7_3); ++ ++static void spapr_machine_rhel730_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel740_class_options(mc); ++ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power7_v2.3"); ++ mc->default_machine_opts = "modern-hotplug-events=off"; ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_3, hw_compat_rhel_7_3_len); ++ compat_props_add(mc->compat_props, spapr_compat_rhel7_3, spapr_compat_rhel7_3_len); ++ ++ smc->phb_placement = phb_placement_2_7; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel730, "rhel7.3.0", false); ++ ++/* ++ * pseries-rhel7.3.0-sxxm ++ * ++ * pseries-rhel7.3.0 with speculative execution exploit mitigations enabled by default ++ */ ++ ++static void spapr_machine_rhel730sxxm_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel730_class_options(mc); ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel730sxxm, "rhel7.3.0-sxxm", false); ++ ++/* ++ * pseries-rhel7.2.0 ++ */ ++/* Should be like spapr_compat_2_5 + 2_4 + 2_3, but "dynamic-reconfiguration" ++ * has been backported to RHEL7_2 so we don't need it here. ++ */ ++ ++GlobalProperty spapr_compat_rhel7_2[] = { ++ { "spapr-vlan", "use-rx-buffer-pools", "off" }, ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "ddw", "off" }, ++}; ++const size_t spapr_compat_rhel7_2_len = G_N_ELEMENTS(spapr_compat_rhel7_2); ++ ++static void spapr_machine_rhel720_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel730_class_options(mc); ++ smc->use_ohci_by_default = true; ++ mc->has_hotpluggable_cpus = NULL; ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_2, hw_compat_rhel_7_2_len); ++ compat_props_add(mc->compat_props, spapr_compat_rhel7_2, spapr_compat_rhel7_2_len); ++} ++ ++DEFINE_SPAPR_MACHINE(rhel720, "rhel7.2.0", false); + + static void spapr_machine_register_types(void) + { +diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c +index 8ba34f6a1d..78eca1c04a 100644 +--- a/hw/ppc/spapr_cpu_core.c ++++ b/hw/ppc/spapr_cpu_core.c +@@ -24,6 +24,7 @@ + #include "sysemu/reset.h" + #include "sysemu/hw_accel.h" + #include "qemu/error-report.h" ++#include "cpu-models.h" + + static void spapr_reset_vcpu(PowerPCCPU *cpu) + { +@@ -250,6 +251,7 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, + { + CPUPPCState *env = &cpu->env; + CPUState *cs = CPU(cpu); ++ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); + + if (!qdev_realize(DEVICE(cpu), NULL, errp)) { + return false; +@@ -261,6 +263,17 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, + cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr)); + kvmppc_set_papr(cpu); + ++ if (!smc->has_power9_support && ++ (((spapr->max_compat_pvr && ++ ppc_compat_cmp(spapr->max_compat_pvr, ++ CPU_POWERPC_LOGICAL_3_00) >= 0)) || ++ (!spapr->max_compat_pvr && ++ ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0, 0)))) { ++ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, ++ "POWER9 CPU is not supported by this machine class"); ++ return false; ++ } ++ + if (spapr_irq_cpu_intc_create(spapr, cpu, errp) < 0) { + qdev_unrealize(DEVICE(cpu)); + return false; +diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h +index ee7504b976..37a014d59c 100644 +--- a/include/hw/ppc/spapr.h ++++ b/include/hw/ppc/spapr.h +@@ -154,6 +154,7 @@ struct SpaprMachineClass { + bool pre_5_2_numa_associativity; + bool pre_6_2_numa_affinity; + ++ bool has_power9_support; + bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index, + uint64_t *buid, hwaddr *pio, + hwaddr *mmio32, hwaddr *mmio64, +@@ -237,6 +238,9 @@ struct SpaprMachineState { + + /* Set by -boot */ + char *boot_device; ++ ++ /* Secure Guest support via x-svm-allowed */ ++ bool svm_allowed; + + /*< public >*/ + char *kvm_type; +diff --git a/target/ppc/compat.c b/target/ppc/compat.c +index 7949a24f5a..f207a9ba01 100644 +--- a/target/ppc/compat.c ++++ b/target/ppc/compat.c +@@ -114,8 +114,19 @@ static const CompatInfo *compat_by_pvr(uint32_t pvr) + return NULL; + } + ++long ppc_compat_cmp(uint32_t pvr1, uint32_t pvr2) ++{ ++ const CompatInfo *compat1 = compat_by_pvr(pvr1); ++ const CompatInfo *compat2 = compat_by_pvr(pvr2); ++ ++ g_assert(compat1); ++ g_assert(compat2); ++ ++ return compat1 - compat2; ++} ++ + static bool pcc_compat(PowerPCCPUClass *pcc, uint32_t compat_pvr, +- uint32_t min_compat_pvr, uint32_t max_compat_pvr) ++ uint32_t min_compat_pvr, uint32_t max_compat_pvr) + { + const CompatInfo *compat = compat_by_pvr(compat_pvr); + const CompatInfo *min = compat_by_pvr(min_compat_pvr); +diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h +index e946da5f3a..23e8b76c85 100644 +--- a/target/ppc/cpu.h ++++ b/target/ppc/cpu.h +@@ -1401,6 +1401,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) + + /* Compatibility modes */ + #if defined(TARGET_PPC64) ++long ppc_compat_cmp(uint32_t pvr1, uint32_t pvr2); + bool ppc_check_compat(PowerPCCPU *cpu, uint32_t compat_pvr, + uint32_t min_compat_pvr, uint32_t max_compat_pvr); + bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr, +diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c +index dc93b99189..154888cce5 100644 +--- a/target/ppc/kvm.c ++++ b/target/ppc/kvm.c +@@ -90,6 +90,7 @@ static int cap_ppc_nested_kvm_hv; + static int cap_large_decr; + static int cap_fwnmi; + static int cap_rpt_invalidate; ++static int cap_ppc_secure_guest; + + static uint32_t debug_inst_opcode; + +@@ -137,6 +138,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); + kvmppc_get_cpu_characteristics(s); + cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV); ++ cap_ppc_secure_guest = kvm_vm_check_extension(s, KVM_CAP_PPC_SECURE_GUEST); + cap_large_decr = kvmppc_get_dec_bits(); + cap_fwnmi = kvm_vm_check_extension(s, KVM_CAP_PPC_FWNMI); + /* +@@ -2563,6 +2565,16 @@ int kvmppc_has_cap_rpt_invalidate(void) + return cap_rpt_invalidate; + } + ++bool kvmppc_has_cap_secure_guest(void) ++{ ++ return !!cap_ppc_secure_guest; ++} ++ ++int kvmppc_enable_cap_secure_guest(void) ++{ ++ return kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SECURE_GUEST, 0, 1); ++} ++ + PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) + { + uint32_t host_pvr = mfpvr(); +@@ -2959,3 +2971,18 @@ bool kvm_arch_cpu_check_are_resettable(void) + { + return true; + } ++ ++void kvmppc_svm_allow(Error **errp) ++{ ++ if (!kvm_enabled()) { ++ error_setg(errp, "No PEF support in tcg, try x-svm-allowed=off"); ++ return; ++ } ++ ++ if (!kvmppc_has_cap_secure_guest()) { ++ error_setg(errp, "KVM implementation does not support secure guests, " ++ "try x-svm-allowed=off"); ++ } else if (kvmppc_enable_cap_secure_guest() < 0) { ++ error_setg(errp, "Error enabling x-svm-allowed, try x-svm-allowed=off"); ++ } ++} +diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h +index ee9325bf9a..20dbb95989 100644 +--- a/target/ppc/kvm_ppc.h ++++ b/target/ppc/kvm_ppc.h +@@ -40,6 +40,7 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu); + target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, + bool radix, bool gtse, + uint64_t proc_tbl); ++void kvmppc_svm_allow(Error **errp); + #ifndef CONFIG_USER_ONLY + bool kvmppc_spapr_use_multitce(void); + int kvmppc_spapr_enable_inkernel_multitce(void); +@@ -74,6 +75,8 @@ int kvmppc_get_cap_large_decr(void); + int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable); + int kvmppc_has_cap_rpt_invalidate(void); + int kvmppc_enable_hwrng(void); ++bool kvmppc_has_cap_secure_guest(void); ++int kvmppc_enable_cap_secure_guest(void); + int kvmppc_put_books_sregs(PowerPCCPU *cpu); + PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void); + void kvmppc_check_papr_resize_hpt(Error **errp); +@@ -393,6 +396,16 @@ static inline int kvmppc_has_cap_rpt_invalidate(void) + return false; + } + ++static inline bool kvmppc_has_cap_secure_guest(void) ++{ ++ return false; ++} ++ ++static inline int kvmppc_enable_cap_secure_guest(void) ++{ ++ return -1; ++} ++ + static inline int kvmppc_enable_hwrng(void) + { + return -1; +-- +2.27.0 + diff --git a/0009-Add-s390x-machine-types.patch b/0009-Add-s390x-machine-types.patch new file mode 100644 index 0000000..2d8b554 --- /dev/null +++ b/0009-Add-s390x-machine-types.patch @@ -0,0 +1,186 @@ +From 680f343e58a50a99d17bc7dedd3ee90980912023 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 19 Oct 2018 13:47:32 +0200 +Subject: Add s390x machine types + +Adding changes to add RHEL machine types for s390x architecture. + +Signed-off-by: Miroslav Rezanina +-- +Merged patches (6.1.0): +- 64a9a5c971 hw/s390x: Remove the RHEL7-only machine type +- 395516d62b redhat: s390x: add rhel-8.5.0 compat machine + +Merged patches (6.2.0): +- 3bf66f4520 redhat: Add s390x machine type compatibility update for 6.1 rebase + +Merged patches (7.0.0): +- e6ff4de4f7 redhat: Add s390x machine type compatibility handling for the rebase to v6.2 +- 4b0efa7e21 redhat: Add rhel8.6.0 and rhel9.0.0 machine types for s390x +- dcc64971bf RHEL: mark old machine types as deprecated (partialy) +--- + hw/core/machine.c | 6 +++ + hw/s390x/s390-virtio-ccw.c | 104 ++++++++++++++++++++++++++++++++++++- + include/hw/boards.h | 2 + + 3 files changed, 111 insertions(+), 1 deletion(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index ea430d844e..77202a3570 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -37,6 +37,12 @@ + #include "hw/virtio/virtio.h" + #include "hw/virtio/virtio-pci.h" + ++/* ++ * RHEL only: machine types for previous major releases are deprecated ++ */ ++const char *rhel_old_machine_deprecation = ++ "machine types for previous major releases are deprecated"; ++ + /* + * Mostly the same as hw_compat_6_0 and hw_compat_6_1 + */ +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 90480e7cf9..ec4176a1e0 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -767,7 +767,7 @@ bool css_migration_enabled(void) + { \ + MachineClass *mc = MACHINE_CLASS(oc); \ + ccw_machine_##suffix##_class_options(mc); \ +- mc->desc = "VirtIO-ccw based S390 machine v" verstr; \ ++ mc->desc = "VirtIO-ccw based S390 machine " verstr; \ + if (latest) { \ + mc->alias = "s390-ccw-virtio"; \ + mc->is_default = true; \ +@@ -791,6 +791,7 @@ bool css_migration_enabled(void) + } \ + type_init(ccw_machine_register_##suffix) + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void ccw_machine_7_0_instance_options(MachineState *machine) + { + } +@@ -1115,6 +1116,107 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) + compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + } + DEFINE_CCW_MACHINE(2_4, "2.4", false); ++#endif ++ ++static void ccw_machine_rhel900_instance_options(MachineState *machine) ++{ ++} ++ ++static void ccw_machine_rhel900_class_options(MachineClass *mc) ++{ ++} ++DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true); ++ ++static void ccw_machine_rhel860_instance_options(MachineState *machine) ++{ ++ /* Note: The -rhel8.6.0 and -rhel9.0.0 machines are technically identical */ ++ ccw_machine_rhel900_instance_options(machine); ++} ++ ++static void ccw_machine_rhel860_class_options(MachineClass *mc) ++{ ++ ccw_machine_rhel900_class_options(mc); ++ ++ /* All RHEL machines for prior major releases are deprecated */ ++ mc->deprecation_reason = rhel_old_machine_deprecation; ++} ++DEFINE_CCW_MACHINE(rhel860, "rhel8.6.0", false); ++ ++static void ccw_machine_rhel850_instance_options(MachineState *machine) ++{ ++ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_0 }; ++ ++ ccw_machine_rhel860_instance_options(machine); ++ ++ s390_set_qemu_cpu_model(0x2964, 13, 2, qemu_cpu_feat); ++ ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_NNPA); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_VECTOR_PACKED_DECIMAL_ENH2); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_BEAR_ENH); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_RDP); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAI); ++} ++ ++static void ccw_machine_rhel850_class_options(MachineClass *mc) ++{ ++ ccw_machine_rhel860_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); ++ mc->smp_props.prefer_sockets = true; ++} ++DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", false); ++ ++static void ccw_machine_rhel840_instance_options(MachineState *machine) ++{ ++ ccw_machine_rhel850_instance_options(machine); ++} ++ ++static void ccw_machine_rhel840_class_options(MachineClass *mc) ++{ ++ ccw_machine_rhel850_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_4, hw_compat_rhel_8_4_len); ++} ++DEFINE_CCW_MACHINE(rhel840, "rhel8.4.0", false); ++ ++static void ccw_machine_rhel820_instance_options(MachineState *machine) ++{ ++ ccw_machine_rhel840_instance_options(machine); ++} ++ ++static void ccw_machine_rhel820_class_options(MachineClass *mc) ++{ ++ ccw_machine_rhel840_class_options(mc); ++ mc->fixup_ram_size = s390_fixup_ram_size; ++ /* we did not publish a rhel8.3.0 machine */ ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_3, hw_compat_rhel_8_3_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_2, hw_compat_rhel_8_2_len); ++} ++DEFINE_CCW_MACHINE(rhel820, "rhel8.2.0", false); ++ ++static void ccw_machine_rhel760_instance_options(MachineState *machine) ++{ ++ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V3_1 }; ++ ++ ccw_machine_rhel820_instance_options(machine); ++ ++ s390_set_qemu_cpu_model(0x2827, 12, 2, qemu_cpu_feat); ++ ++ /* The multiple-epoch facility was not available with rhel7.6.0 on z14GA1 */ ++ s390_cpudef_featoff(14, 1, S390_FEAT_MULTIPLE_EPOCH); ++ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_QSIE); ++ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_QTOUE); ++ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_STOE); ++ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_STOUE); ++} ++ ++static void ccw_machine_rhel760_class_options(MachineClass *mc) ++{ ++ ccw_machine_rhel820_class_options(mc); ++ /* We never published the s390x version of RHEL-AV 8.0 and 8.1, so add this here */ ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); ++} ++DEFINE_CCW_MACHINE(rhel760, "rhel7.6.0", false); + + static void ccw_machine_register_types(void) + { +diff --git a/include/hw/boards.h b/include/hw/boards.h +index c90a19b4d1..bf59275f18 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -470,4 +470,6 @@ extern const size_t hw_compat_rhel_8_0_len; + extern GlobalProperty hw_compat_rhel_7_6[]; + extern const size_t hw_compat_rhel_7_6_len; + ++extern const char *rhel_old_machine_deprecation; ++ + #endif +-- +2.31.1 + diff --git a/0010-Add-s390x-machine-types.patch b/0010-Add-s390x-machine-types.patch new file mode 100644 index 0000000..fbb8841 --- /dev/null +++ b/0010-Add-s390x-machine-types.patch @@ -0,0 +1,165 @@ +From 4ad9a0d0582eef78946b47563eb2c5b7ddf0cbb0 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 19 Oct 2018 13:47:32 +0200 +Subject: Add s390x machine types + +Adding changes to add RHEL machine types for s390x architecture. + +Signed-off-by: Miroslav Rezanina + +Rebase changes (weekly-4.1.0): +- Use upstream compat handling + +Rebase notes (weekly-210303): +- Use rhel-8.4.0 hw compat + +Merged patches (3.1.0): +- 29df663 s390x/cpumodel: default enable bpb and ppa15 for z196 and later + +Merged patches (4.1.0): +- 6c200d665b hw/s390x/s390-virtio-ccw: Add machine types for RHEL8.0.0 + +Merged patches (4.2.0): +- fb192e5 redhat: s390x: Rename s390-ccw-virtio-rhel8.0.0 to s390-ccw-virtio-rhel8.1.0 +- a9b22e8 redhat: s390x: Add proper compatibility options for the -rhel7.6.0 machine +- hw/s390x: Add the s390-ccw-virtio-rhel8.2.0 machine types (patch 92954) + +Merged patches (weekly-201216): +- a6ae745cce redhat: s390x: add rhel-8.4.0 compat machine + +Merged patches (weekly-210602): +- 50835d3429 redhat: s390x: add rhel-8.5.0 compat machine + +Merged patches (weekly-211006): +- a3bcde27fe redhat: Add s390x machine type compatibility update for 6.1 rebase +--- + hw/s390x/s390-virtio-ccw.c | 99 +++++++++++++++++++++++++++++++++++++- + 1 file changed, 98 insertions(+), 1 deletion(-) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 653587ea62..181856e6cf 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -767,7 +767,7 @@ bool css_migration_enabled(void) + { \ + MachineClass *mc = MACHINE_CLASS(oc); \ + ccw_machine_##suffix##_class_options(mc); \ +- mc->desc = "VirtIO-ccw based S390 machine v" verstr; \ ++ mc->desc = "VirtIO-ccw based S390 machine " verstr; \ + if (latest) { \ + mc->alias = "s390-ccw-virtio"; \ + mc->is_default = true; \ +@@ -791,6 +791,7 @@ bool css_migration_enabled(void) + } \ + type_init(ccw_machine_register_##suffix) + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void ccw_machine_6_2_instance_options(MachineState *machine) + { + } +@@ -1100,6 +1101,102 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) + compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + } + DEFINE_CCW_MACHINE(2_4, "2.4", false); ++#endif ++ ++static void ccw_machine_rhel850_instance_options(MachineState *machine) ++{ ++} ++ ++static void ccw_machine_rhel850_class_options(MachineClass *mc) ++{ ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); ++} ++DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", true); ++ ++static void ccw_machine_rhel840_instance_options(MachineState *machine) ++{ ++ ccw_machine_rhel850_instance_options(machine); ++} ++ ++static void ccw_machine_rhel840_class_options(MachineClass *mc) ++{ ++ ccw_machine_rhel850_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_4, hw_compat_rhel_8_4_len); ++} ++DEFINE_CCW_MACHINE(rhel840, "rhel8.4.0", false); ++ ++static void ccw_machine_rhel820_instance_options(MachineState *machine) ++{ ++ ccw_machine_rhel840_instance_options(machine); ++} ++ ++static void ccw_machine_rhel820_class_options(MachineClass *mc) ++{ ++ ccw_machine_rhel840_class_options(mc); ++ mc->fixup_ram_size = s390_fixup_ram_size; ++ /* we did not publish a rhel8.3.0 machine */ ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_3, hw_compat_rhel_8_3_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_2, hw_compat_rhel_8_2_len); ++} ++DEFINE_CCW_MACHINE(rhel820, "rhel8.2.0", false); ++ ++static void ccw_machine_rhel760_instance_options(MachineState *machine) ++{ ++ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V3_1 }; ++ ++ ccw_machine_rhel820_instance_options(machine); ++ ++ s390_set_qemu_cpu_model(0x2827, 12, 2, qemu_cpu_feat); ++ ++ /* The multiple-epoch facility was not available with rhel7.6.0 on z14GA1 */ ++ s390_cpudef_featoff(14, 1, S390_FEAT_MULTIPLE_EPOCH); ++ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_QSIE); ++ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_QTOUE); ++ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_STOE); ++ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_STOUE); ++} ++ ++static void ccw_machine_rhel760_class_options(MachineClass *mc) ++{ ++ ccw_machine_rhel820_class_options(mc); ++ /* We never published the s390x version of RHEL-AV 8.0 and 8.1, so add this here */ ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); ++} ++DEFINE_CCW_MACHINE(rhel760, "rhel7.6.0", false); ++ ++static void ccw_machine_rhel750_instance_options(MachineState *machine) ++{ ++ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V2_11 }; ++ ccw_machine_rhel760_instance_options(machine); ++ ++ /* before 2.12 we emulated the very first z900, and RHEL 7.5 is ++ based on 2.10 */ ++ s390_set_qemu_cpu_model(0x2064, 7, 1, qemu_cpu_feat); ++ ++ /* bpb and ppa15 were only in the full model in RHEL 7.5 */ ++ s390_cpudef_featoff_greater(11, 1, S390_FEAT_PPA15); ++ s390_cpudef_featoff_greater(11, 1, S390_FEAT_BPB); ++} ++ ++GlobalProperty ccw_compat_rhel_7_5[] = { ++ { ++ .driver = TYPE_SCLP_EVENT_FACILITY, ++ .property = "allow_all_mask_sizes", ++ .value = "off", ++ }, ++}; ++const size_t ccw_compat_rhel_7_5_len = G_N_ELEMENTS(ccw_compat_rhel_7_5); ++ ++static void ccw_machine_rhel750_class_options(MachineClass *mc) ++{ ++ ccw_machine_rhel760_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); ++ compat_props_add(mc->compat_props, ccw_compat_rhel_7_5, ccw_compat_rhel_7_5_len); ++ S390_CCW_MACHINE_CLASS(mc)->hpage_1m_allowed = false; ++} ++DEFINE_CCW_MACHINE(rhel750, "rhel7.5.0", false); + + static void ccw_machine_register_types(void) + { +-- +2.27.0 + diff --git a/0010-Add-x86_64-machine-types.patch b/0010-Add-x86_64-machine-types.patch new file mode 100644 index 0000000..7c48967 --- /dev/null +++ b/0010-Add-x86_64-machine-types.patch @@ -0,0 +1,714 @@ +From 427a575ca57966bc72e1ebf218081da530d435d7 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 19 Oct 2018 13:10:31 +0200 +Subject: Add x86_64 machine types + +Adding changes to add RHEL machine types for x86_64 architecture. + +Signed-off-by: Miroslav Rezanina + +Rebase notes (6.1.0): +- Update qemu64 cpu spec + +Rebase notes (7.0.0): +- Reset alias for all machine-types except latest one + +Merged patches (6.1.0): +- 59c284ad3b x86: Add x86 rhel8.5 machine types +- a8868b42fe redhat: x86: Enable 'kvm-asyncpf-int' by default +- a3995e2eff Remove RHEL 7.0.0 machine type (only x86_64 changes) +- ad3190a79b Remove RHEL 7.1.0 machine type (only x86_64 changes) +- 84bbe15d4e Remove RHEL 7.2.0 machine type (only x86_64 changes) +- 0215eb3356 Remove RHEL 7.3.0 machine types (only x86_64 changes) +- af69d1ca6e Remove RHEL 7.4.0 machine types (only x86_64 changes) +- 8f7a74ab78 Remove RHEL 7.5.0 machine types (only x86_64 changes) + +Merged patches (7.0.0): +- eae7d8dd3c x86/rhel machine types: Add pc_rhel_8_5_compat +- 6762f56469 x86/rhel machine types: Wire compat into q35 and i440fx +- 5762101438 rhel machine types/x86: set prefer_sockets +- 9ba9ddc632 x86: Add q35 RHEL 8.6.0 machine type +- 6110d865e5 x86: Add q35 RHEL 9.0.0 machine type +- dcc64971bf RHEL: mark old machine types as deprecated (partialy) +- 6b396f182b RHEL: disable "seqpacket" for "vhost-vsock-device" in rhel8.6.0 +--- + hw/core/machine.c | 10 ++ + hw/i386/pc.c | 135 +++++++++++++++++++++- + hw/i386/pc_piix.c | 79 ++++++++++++- + hw/i386/pc_q35.c | 227 ++++++++++++++++++++++++++++++++++++- + hw/s390x/s390-virtio-ccw.c | 1 + + include/hw/boards.h | 5 + + include/hw/i386/pc.h | 24 ++++ + target/i386/kvm/kvm-cpu.c | 1 + + target/i386/kvm/kvm.c | 4 + + tests/qtest/pvpanic-test.c | 5 +- + 10 files changed, 484 insertions(+), 7 deletions(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 77202a3570..28989b6e7b 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -43,6 +43,16 @@ + const char *rhel_old_machine_deprecation = + "machine types for previous major releases are deprecated"; + ++GlobalProperty hw_compat_rhel_8_6[] = { ++ /* hw_compat_rhel_8_6 bz 2065589 */ ++ /* ++ * vhost-vsock device in RHEL 8 kernels doesn't support seqpacket, so ++ * we need do disable it downstream on the latest hw_compat_rhel_8. ++ */ ++ { "vhost-vsock-device", "seqpacket", "off" }, ++}; ++const size_t hw_compat_rhel_8_6_len = G_N_ELEMENTS(hw_compat_rhel_8_6); ++ + /* + * Mostly the same as hw_compat_6_0 and hw_compat_6_1 + */ +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index fd55fc725c..263d882af6 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -375,6 +375,137 @@ GlobalProperty pc_compat_1_4[] = { + }; + const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); + ++/* This macro is for changes to properties that are RHEL specific, ++ * different to the current upstream and to be applied to the latest ++ * machine type. ++ */ ++GlobalProperty pc_rhel_compat[] = { ++ { TYPE_X86_CPU, "host-phys-bits", "on" }, ++ { TYPE_X86_CPU, "host-phys-bits-limit", "48" }, ++ { TYPE_X86_CPU, "vmx-entry-load-perf-global-ctrl", "off" }, ++ { TYPE_X86_CPU, "vmx-exit-load-perf-global-ctrl", "off" }, ++ /* bz 1508330 */ ++ { "vfio-pci", "x-no-geforce-quirks", "on" }, ++ /* bz 1941397 */ ++ { TYPE_X86_CPU, "kvm-asyncpf-int", "on" }, ++}; ++const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); ++ ++GlobalProperty pc_rhel_8_5_compat[] = { ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { "qemu64" "-" TYPE_X86_CPU, "family", "6" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { "qemu64" "-" TYPE_X86_CPU, "model", "6" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { "qemu64" "-" TYPE_X86_CPU, "stepping", "3" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { TYPE_X86_CPU, "x-vendor-cpuid-only", "off" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { "ICH9-LPC", ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, "off" }, ++ ++ /* pc_rhel_8_5_compat from pc_compat_6_1 */ ++ { TYPE_X86_CPU, "hv-version-id-build", "0x1bbc" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_1 */ ++ { TYPE_X86_CPU, "hv-version-id-major", "0x0006" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_1 */ ++ { TYPE_X86_CPU, "hv-version-id-minor", "0x0001" }, ++}; ++const size_t pc_rhel_8_5_compat_len = G_N_ELEMENTS(pc_rhel_8_5_compat); ++ ++GlobalProperty pc_rhel_8_4_compat[] = { ++ /* pc_rhel_8_4_compat from pc_compat_5_2 */ ++ { "ICH9-LPC", "x-smi-cpu-hotunplug", "off" }, ++ { TYPE_X86_CPU, "kvm-asyncpf-int", "off" }, ++}; ++const size_t pc_rhel_8_4_compat_len = G_N_ELEMENTS(pc_rhel_8_4_compat); ++ ++GlobalProperty pc_rhel_8_3_compat[] = { ++ /* pc_rhel_8_3_compat from pc_compat_5_1 */ ++ { "ICH9-LPC", "x-smi-cpu-hotplug", "off" }, ++}; ++const size_t pc_rhel_8_3_compat_len = G_N_ELEMENTS(pc_rhel_8_3_compat); ++ ++GlobalProperty pc_rhel_8_2_compat[] = { ++ /* pc_rhel_8_2_compat from pc_compat_4_2 */ ++ { "mch", "smbase-smram", "off" }, ++}; ++const size_t pc_rhel_8_2_compat_len = G_N_ELEMENTS(pc_rhel_8_2_compat); ++ ++/* pc_rhel_8_1_compat is empty since pc_4_1_compat is */ ++GlobalProperty pc_rhel_8_1_compat[] = { }; ++const size_t pc_rhel_8_1_compat_len = G_N_ELEMENTS(pc_rhel_8_1_compat); ++ ++GlobalProperty pc_rhel_8_0_compat[] = { ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "intel-iommu", "dma-drain", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G3" "-" TYPE_X86_CPU, "rdtscp", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G4" "-" TYPE_X86_CPU, "rdtscp", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G4" "-" TYPE_X86_CPU, "npt", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G4" "-" TYPE_X86_CPU, "nrip-save", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G5" "-" TYPE_X86_CPU, "rdtscp", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G5" "-" TYPE_X86_CPU, "npt", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G5" "-" TYPE_X86_CPU, "nrip-save", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "EPYC" "-" TYPE_X86_CPU, "npt", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "EPYC" "-" TYPE_X86_CPU, "nrip-save", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "EPYC-IBPB" "-" TYPE_X86_CPU, "npt", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "EPYC-IBPB" "-" TYPE_X86_CPU, "nrip-save", "off" }, ++ /** The mpx=on entries from pc_compat_3_1 are in pc_rhel_7_6_compat **/ ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Cascadelake-Server" "-" TYPE_X86_CPU, "stepping", "5" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { TYPE_X86_CPU, "x-intel-pt-auto-level", "off" }, ++}; ++const size_t pc_rhel_8_0_compat_len = G_N_ELEMENTS(pc_rhel_8_0_compat); ++ ++/* Similar to PC_COMPAT_3_0 + PC_COMPAT_2_12, but: ++ * all of the 2_12 stuff was already in 7.6 from bz 1481253 ++ * x-migrate-smi-count comes from PC_COMPAT_2_11 but ++ * is really tied to kernel version so keep it off on 7.x ++ * machine types irrespective of host. ++ */ ++GlobalProperty pc_rhel_7_6_compat[] = { ++ /* pc_rhel_7_6_compat from pc_compat_3_0 */ ++ { TYPE_X86_CPU, "x-hv-synic-kvm-only", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_3_0 */ ++ { "Skylake-Server" "-" TYPE_X86_CPU, "pku", "off" }, ++ /* pc_rhel_7_6_compat from pc_compat_3_0 */ ++ { "Skylake-Server-IBRS" "-" TYPE_X86_CPU, "pku", "off" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { TYPE_X86_CPU, "x-migrate-smi-count", "off" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Skylake-Client" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Skylake-Client-IBRS" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Skylake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Skylake-Server-IBRS" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Cascadelake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Icelake-Client" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Icelake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, ++}; ++const size_t pc_rhel_7_6_compat_len = G_N_ELEMENTS(pc_rhel_7_6_compat); ++ ++/* ++ * The PC_RHEL_*_COMPAT serve the same purpose for RHEL-7 machine ++ * types as the PC_COMPAT_* do for upstream types. ++ * PC_RHEL_7_*_COMPAT apply both to i440fx and q35 types. ++ */ ++ + GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled) + { + GSIState *s; +@@ -1738,6 +1869,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + pcmc->pvh_enabled = true; + pcmc->kvmclock_create_always = true; + assert(!mc->get_hotplug_handler); ++ mc->async_pf_vmexit_disable = false; + mc->get_hotplug_handler = pc_get_hotplug_handler; + mc->hotplug_allowed = pc_hotplug_allowed; + mc->cpu_index_to_instance_props = x86_cpu_index_to_props; +@@ -1748,7 +1880,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + mc->has_hotpluggable_cpus = true; + mc->default_boot_order = "cad"; + mc->block_default_type = IF_IDE; +- mc->max_cpus = 255; ++ /* 240: max CPU count for RHEL */ ++ mc->max_cpus = 240; + mc->reset = pc_machine_reset; + mc->wakeup = pc_machine_wakeup; + hc->pre_plug = pc_machine_device_pre_plug_cb; +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index c797e98312..0cacc0d623 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -50,6 +50,7 @@ + #include "qapi/error.h" + #include "qemu/error-report.h" + #include "sysemu/xen.h" ++#include "migration/migration.h" + #ifdef CONFIG_XEN + #include + #include "hw/xen/xen_pt.h" +@@ -174,8 +175,8 @@ static void pc_init1(MachineState *machine, + if (pcmc->smbios_defaults) { + MachineClass *mc = MACHINE_GET_CLASS(machine); + /* These values are guest ABI, do not change */ +- smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)", +- mc->name, pcmc->smbios_legacy_mode, ++ smbios_set_defaults("Red Hat", "KVM", ++ mc->desc, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, + pcmc->smbios_stream_product, + pcmc->smbios_stream_version, +@@ -314,6 +315,7 @@ static void pc_init1(MachineState *machine, + * hw_compat_*, pc_compat_*, or * pc_*_machine_options(). + */ + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void pc_compat_2_3_fn(MachineState *machine) + { + X86MachineState *x86ms = X86_MACHINE(machine); +@@ -967,3 +969,76 @@ static void xenfv_3_1_machine_options(MachineClass *m) + DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init, + xenfv_3_1_machine_options); + #endif ++#endif /* Disabled for Red Hat Enterprise Linux */ ++ ++/* Red Hat Enterprise Linux machine types */ ++ ++/* Options for the latest rhel7 machine type */ ++static void pc_machine_rhel7_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ m->family = "pc_piix_Y"; ++ m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; ++ pcmc->default_nic_model = "e1000"; ++ pcmc->pci_root_uid = 0; ++ m->default_display = "std"; ++ m->no_parallel = 1; ++ m->numa_mem_supported = true; ++ m->auto_enable_numa_with_memdev = false; ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); ++ compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); ++ m->alias = "pc"; ++ m->is_default = 1; ++ m->smp_props.prefer_sockets = true; ++} ++ ++static void pc_init_rhel760(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel760_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_machine_rhel7_options(m); ++ m->desc = "RHEL 7.6.0 PC (i440FX + PIIX, 1996)"; ++ m->async_pf_vmexit_disable = true; ++ m->smbus_no_migration_support = true; ++ ++ /* All RHEL machines for prior major releases are deprecated */ ++ m->deprecation_reason = rhel_old_machine_deprecation; ++ ++ pcmc->pvh_enabled = false; ++ pcmc->default_cpu_version = CPU_VERSION_LEGACY; ++ pcmc->kvmclock_create_always = false; ++ /* From pc_i440fx_5_1_machine_options() */ ++ pcmc->pci_root_uid = 1; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_6, ++ hw_compat_rhel_8_6_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_8_5, ++ hw_compat_rhel_8_5_len); ++ compat_props_add(m->compat_props, pc_rhel_8_5_compat, ++ pc_rhel_8_5_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_8_4, ++ hw_compat_rhel_8_4_len); ++ compat_props_add(m->compat_props, pc_rhel_8_4_compat, ++ pc_rhel_8_4_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_8_3, ++ hw_compat_rhel_8_3_len); ++ compat_props_add(m->compat_props, pc_rhel_8_3_compat, ++ pc_rhel_8_3_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_8_2, ++ hw_compat_rhel_8_2_len); ++ compat_props_add(m->compat_props, pc_rhel_8_2_compat, ++ pc_rhel_8_2_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); ++ compat_props_add(m->compat_props, pc_rhel_8_1_compat, pc_rhel_8_1_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); ++ compat_props_add(m->compat_props, pc_rhel_8_0_compat, pc_rhel_8_0_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); ++ compat_props_add(m->compat_props, pc_rhel_7_6_compat, pc_rhel_7_6_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel760, "pc-i440fx-rhel7.6.0", pc_init_rhel760, ++ pc_machine_rhel760_options); +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index b695f88c45..157160e069 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -197,8 +197,8 @@ static void pc_q35_init(MachineState *machine) + + if (pcmc->smbios_defaults) { + /* These values are guest ABI, do not change */ +- smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", +- mc->name, pcmc->smbios_legacy_mode, ++ smbios_set_defaults("Red Hat", "KVM", ++ mc->desc, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, + pcmc->smbios_stream_product, + pcmc->smbios_stream_version, +@@ -342,6 +342,7 @@ static void pc_q35_init(MachineState *machine) + DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) + + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void pc_q35_machine_options(MachineClass *m) + { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); +@@ -631,3 +632,225 @@ static void pc_q35_2_4_machine_options(MachineClass *m) + + DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, + pc_q35_2_4_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ ++ ++/* Red Hat Enterprise Linux machine types */ ++ ++/* Options for the latest rhel q35 machine type */ ++static void pc_q35_machine_rhel_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pcmc->default_nic_model = "e1000e"; ++ pcmc->pci_root_uid = 0; ++ m->family = "pc_q35_Z"; ++ m->units_per_default_bus = 1; ++ m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; ++ m->default_display = "std"; ++ m->no_floppy = 1; ++ m->no_parallel = 1; ++ pcmc->default_cpu_version = 1; ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_AMD_IOMMU_DEVICE); ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE); ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); ++ m->alias = "q35"; ++ m->max_cpus = 710; ++ compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); ++} ++ ++static void pc_q35_init_rhel900(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel900_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel_options(m); ++ m->desc = "RHEL-9.0.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->smbios_stream_product = "RHEL"; ++ pcmc->smbios_stream_version = "9.0.0"; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel900, "pc-q35-rhel9.0.0", pc_q35_init_rhel900, ++ pc_q35_machine_rhel900_options); ++ ++static void pc_q35_init_rhel860(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel860_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel900_options(m); ++ m->desc = "RHEL-8.6.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; ++ ++ /* All RHEL machines for prior major releases are deprecated */ ++ m->deprecation_reason = rhel_old_machine_deprecation; ++ ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.6.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_6, ++ hw_compat_rhel_8_6_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel860, "pc-q35-rhel8.6.0", pc_q35_init_rhel860, ++ pc_q35_machine_rhel860_options); ++ ++ ++static void pc_q35_init_rhel850(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel850_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel860_options(m); ++ m->desc = "RHEL-8.5.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.5.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_5, ++ hw_compat_rhel_8_5_len); ++ compat_props_add(m->compat_props, pc_rhel_8_5_compat, ++ pc_rhel_8_5_compat_len); ++ m->smp_props.prefer_sockets = true; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel850, "pc-q35-rhel8.5.0", pc_q35_init_rhel850, ++ pc_q35_machine_rhel850_options); ++ ++ ++static void pc_q35_init_rhel840(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel840_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel850_options(m); ++ m->desc = "RHEL-8.4.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.4.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_4, ++ hw_compat_rhel_8_4_len); ++ compat_props_add(m->compat_props, pc_rhel_8_4_compat, ++ pc_rhel_8_4_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel840, "pc-q35-rhel8.4.0", pc_q35_init_rhel840, ++ pc_q35_machine_rhel840_options); ++ ++ ++static void pc_q35_init_rhel830(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel830_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel840_options(m); ++ m->desc = "RHEL-8.3.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.3.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_3, ++ hw_compat_rhel_8_3_len); ++ compat_props_add(m->compat_props, pc_rhel_8_3_compat, ++ pc_rhel_8_3_compat_len); ++ /* From pc_q35_5_1_machine_options() */ ++ pcmc->kvmclock_create_always = false; ++ /* From pc_q35_5_1_machine_options() */ ++ pcmc->pci_root_uid = 1; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel830, "pc-q35-rhel8.3.0", pc_q35_init_rhel830, ++ pc_q35_machine_rhel830_options); ++ ++static void pc_q35_init_rhel820(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel820_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel830_options(m); ++ m->desc = "RHEL-8.2.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; ++ m->numa_mem_supported = true; ++ m->auto_enable_numa_with_memdev = false; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.2.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_2, ++ hw_compat_rhel_8_2_len); ++ compat_props_add(m->compat_props, pc_rhel_8_2_compat, ++ pc_rhel_8_2_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel820, "pc-q35-rhel8.2.0", pc_q35_init_rhel820, ++ pc_q35_machine_rhel820_options); ++ ++static void pc_q35_init_rhel810(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel810_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel820_options(m); ++ m->desc = "RHEL-8.1.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; ++ pcmc->smbios_stream_product = NULL; ++ pcmc->smbios_stream_version = NULL; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); ++ compat_props_add(m->compat_props, pc_rhel_8_1_compat, pc_rhel_8_1_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel810, "pc-q35-rhel8.1.0", pc_q35_init_rhel810, ++ pc_q35_machine_rhel810_options); ++ ++static void pc_q35_init_rhel800(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel800_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel810_options(m); ++ m->desc = "RHEL-8.0.0 PC (Q35 + ICH9, 2009)"; ++ m->smbus_no_migration_support = true; ++ m->alias = NULL; ++ pcmc->pvh_enabled = false; ++ pcmc->default_cpu_version = CPU_VERSION_LEGACY; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); ++ compat_props_add(m->compat_props, pc_rhel_8_0_compat, pc_rhel_8_0_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel800, "pc-q35-rhel8.0.0", pc_q35_init_rhel800, ++ pc_q35_machine_rhel800_options); ++ ++static void pc_q35_init_rhel760(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel760_options(MachineClass *m) ++{ ++ pc_q35_machine_rhel800_options(m); ++ m->alias = NULL; ++ m->desc = "RHEL-7.6.0 PC (Q35 + ICH9, 2009)"; ++ m->async_pf_vmexit_disable = true; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); ++ compat_props_add(m->compat_props, pc_rhel_7_6_compat, pc_rhel_7_6_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel760, "pc-q35-rhel7.6.0", pc_q35_init_rhel760, ++ pc_q35_machine_rhel760_options); +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index ec4176a1e0..465a2a09d2 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1136,6 +1136,7 @@ static void ccw_machine_rhel860_instance_options(MachineState *machine) + static void ccw_machine_rhel860_class_options(MachineClass *mc) + { + ccw_machine_rhel900_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_6, hw_compat_rhel_8_6_len); + + /* All RHEL machines for prior major releases are deprecated */ + mc->deprecation_reason = rhel_old_machine_deprecation; +diff --git a/include/hw/boards.h b/include/hw/boards.h +index bf59275f18..d1555665df 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -266,6 +266,8 @@ struct MachineClass { + strList *allowed_dynamic_sysbus_devices; + bool auto_enable_numa_with_memhp; + bool auto_enable_numa_with_memdev; ++ /* RHEL only */ ++ bool async_pf_vmexit_disable; + bool ignore_boot_device_suffixes; + bool smbus_no_migration_support; + bool nvdimm_supported; +@@ -449,6 +451,9 @@ extern const size_t hw_compat_2_2_len; + extern GlobalProperty hw_compat_2_1[]; + extern const size_t hw_compat_2_1_len; + ++extern GlobalProperty hw_compat_rhel_8_6[]; ++extern const size_t hw_compat_rhel_8_6_len; ++ + extern GlobalProperty hw_compat_rhel_8_5[]; + extern const size_t hw_compat_rhel_8_5_len; + +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 91331059d9..419a6ec24b 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -289,6 +289,30 @@ extern const size_t pc_compat_1_5_len; + extern GlobalProperty pc_compat_1_4[]; + extern const size_t pc_compat_1_4_len; + ++extern GlobalProperty pc_rhel_compat[]; ++extern const size_t pc_rhel_compat_len; ++ ++extern GlobalProperty pc_rhel_8_5_compat[]; ++extern const size_t pc_rhel_8_5_compat_len; ++ ++extern GlobalProperty pc_rhel_8_4_compat[]; ++extern const size_t pc_rhel_8_4_compat_len; ++ ++extern GlobalProperty pc_rhel_8_3_compat[]; ++extern const size_t pc_rhel_8_3_compat_len; ++ ++extern GlobalProperty pc_rhel_8_2_compat[]; ++extern const size_t pc_rhel_8_2_compat_len; ++ ++extern GlobalProperty pc_rhel_8_1_compat[]; ++extern const size_t pc_rhel_8_1_compat_len; ++ ++extern GlobalProperty pc_rhel_8_0_compat[]; ++extern const size_t pc_rhel_8_0_compat_len; ++ ++extern GlobalProperty pc_rhel_7_6_compat[]; ++extern const size_t pc_rhel_7_6_compat_len; ++ + /* Helper for setting model-id for CPU models that changed model-id + * depending on QEMU versions up to QEMU 2.4. + */ +diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c +index 5eb955ce9a..74c1396a93 100644 +--- a/target/i386/kvm/kvm-cpu.c ++++ b/target/i386/kvm/kvm-cpu.c +@@ -137,6 +137,7 @@ static PropValue kvm_default_props[] = { + { "acpi", "off" }, + { "monitor", "off" }, + { "svm", "off" }, ++ { "kvm-pv-unhalt", "on" }, + { NULL, NULL }, + }; + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 9cf8e03669..6d1e009443 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -3488,6 +3488,7 @@ static int kvm_get_msrs(X86CPU *cpu) + struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; + int ret, i; + uint64_t mtrr_top_bits; ++ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); + + kvm_msr_buf_reset(cpu); + +@@ -3822,6 +3823,9 @@ static int kvm_get_msrs(X86CPU *cpu) + break; + case MSR_KVM_ASYNC_PF_EN: + env->async_pf_en_msr = msrs[i].data; ++ if (mc->async_pf_vmexit_disable) { ++ env->async_pf_en_msr &= ~(1ULL << 2); ++ } + break; + case MSR_KVM_ASYNC_PF_INT: + env->async_pf_int_msr = msrs[i].data; +diff --git a/tests/qtest/pvpanic-test.c b/tests/qtest/pvpanic-test.c +index 6dcad2db49..580c2c43d2 100644 +--- a/tests/qtest/pvpanic-test.c ++++ b/tests/qtest/pvpanic-test.c +@@ -17,7 +17,7 @@ static void test_panic_nopause(void) + QDict *response, *data; + QTestState *qts; + +- qts = qtest_init("-device pvpanic -action panic=none"); ++ qts = qtest_init("-M q35 -device pvpanic -action panic=none"); + + val = qtest_inb(qts, 0x505); + g_assert_cmpuint(val, ==, 3); +@@ -40,7 +40,8 @@ static void test_panic(void) + QDict *response, *data; + QTestState *qts; + +- qts = qtest_init("-device pvpanic -action panic=pause"); ++ /* RHEL: Use q35 */ ++ qts = qtest_init("-M q35 -device pvpanic -action panic=pause"); + + val = qtest_inb(qts, 0x505); + g_assert_cmpuint(val, ==, 3); +-- +2.31.1 + diff --git a/0011-Add-x86_64-machine-types.patch b/0011-Add-x86_64-machine-types.patch new file mode 100644 index 0000000..2702772 --- /dev/null +++ b/0011-Add-x86_64-machine-types.patch @@ -0,0 +1,1276 @@ +From c2b3564ce466bc5069bf9f5b0694025c68b0858d Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 19 Oct 2018 13:10:31 +0200 +Subject: Add x86_64 machine types + +Adding changes to add RHEL machine types for x86_64 architecture. + +Signed-off-by: Miroslav Rezanina + +Rebase changes (qemu-4.0.0): +- Use upstream compat handling + +Rebase notes (3.1.0): +- Removed xsave changes + +Rebase notes (4.1.0): +- Updated format for compat structures + +Rebase notes (4.2.0-rc2): +- Use X86MachineClass for save_tsc_khz (upstream change) + +Rebase notes (weekly-210303): +- Use rhel-8.4.0 hw compat + +Rebase notes (weekly-210519): +- kvm_default_props moved to new file (upstream) + +Rebase notes (6.2.0-rc0): +- linuxboot_dma_enabled moved to X86MachineState + +Merged patches (4.1.0): +- f4dc802 pc: 7.5 compat entries +- 456ed3e pc: PC_RHEL7_6_COMPAT +- 04119ee pc: Add compat for pc-i440fx-rhel7.6.0 machine type +- b3b3687 pc: Add pc-q35-8.0.0 machine type +- 8d46fc6 pc: Add x-migrate-smi-count=off to PC_RHEL7_6_COMPAT +- 1de7949 kvm: clear out KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT for older machine types +- 18cf0d7 target/i386: Disable MPX support on named CPU models (partialy) +- 2660667 rhel: Set host-phys-bits-limit=48 on rhel machine-types + +Merged patches (4.2.0): +- 7d5c2ef pc: Don't make die-id mandatory unless necessary +- e42808c x86 machine types: pc_rhel_8_0_compat +- 9de83a8 x86 machine types: q35: Fixup units_per_default_bus +- 6df1559 x86 machine types: Fixup dynamic sysbus entries +- 0784125 x86 machine types: add pc-q35-rhel8.1.0 +- machines/x86: Add rhel 8.2 machine type (patch 92959) + +Merged patches (5.1.0): +- 481357e RHEL: hw/i386: disable nested PERF_GLOBAL_CTRL MSR support +- e6c3fbf hw/smbios: set new default SMBIOS fields for Windows driver support (partialy) + +Merged patches (5.2.0 rc0): +- b02c9f5 x86: Add 8.3.0 x86_64 machine type +- f2edc4f q35: Set max_cpus to 512 +- 6d7ba66 machine types/numa: set numa_mem_supported on old machine types (partialy) +- 25c5644 machine_types/numa: compatibility for auto_enable_numa_with_memdev (partialy) +- e2d3209 x86: lpc9: let firmware negotiate 'CPU hotplug with SMI' features (partialy) + +Merged patches (weekly-210120): +- d0afeaa0c4 RHEL: Switch pvpanic test to q35 +- e19cdad83c 8.4 x86 machine type + +Merged patches (weekly-210203): +- 96f8781bd6 q35: Increase max_cpus to 710 on pc-q35-rhel8* machine types + +Merged patches (weekly-210224): +- 70d3924521 redhat: Add some devices for exporting upstream machine types + - machine type chunks only + +Merged patches (6.0.0 rc0): +- 031c690804 i386/acpi: restore device paths for pre-5.1 vms + +Merged patches (weekly-210623): +- 64c350696f x86: Add x86 rhel8.5 machine types +- 1c8fe5e164 redhat: x86: Enable 'kvm-asyncpf-int' by default + +Merged patches (weekly-210714): +- 618e2424ed redhat: Expose upstream machines pc-4.2 and pc-2.11 +- c4d1aa8bf2 redhat: Enable FDC device for upstream machines too +- 66882f9a32 redhat: Add hw_compat_4_2_extra and apply to upstream machines + +Fix machine type +--- + hw/block/fdc.c | 5 +- + hw/i386/acpi-build.c | 3 + + hw/i386/pc.c | 298 ++++++++++++++++++++++++++++++++++++- + hw/i386/pc_piix.c | 274 +++++++++++++++++++++++++++++++++- + hw/i386/pc_q35.c | 234 ++++++++++++++++++++++++++++- + include/hw/boards.h | 2 + + include/hw/i386/pc.h | 45 ++++++ + target/i386/kvm/kvm-cpu.c | 1 + + target/i386/kvm/kvm.c | 4 + + tests/qtest/pvpanic-test.c | 5 +- + 10 files changed, 862 insertions(+), 9 deletions(-) + +diff --git a/hw/block/fdc.c b/hw/block/fdc.c +index 97fa6de423..63042ef030 100644 +--- a/hw/block/fdc.c ++++ b/hw/block/fdc.c +@@ -2341,7 +2341,10 @@ void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, Error **errp) + + /* Restricted for Red Hat Enterprise Linux: */ + MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); +- if (!strstr(mc->name, "-rhel7.")) { ++ if (!strstr(mc->name, "-rhel7.") && ++ /* Exported two upstream machine types allows FDC too */ ++ strcmp(mc->name, "pc-i440fx-4.2") && ++ strcmp(mc->name, "pc-i440fx-2.11")) { + error_setg(errp, "Device %s is not supported with machine type %s", + object_get_typename(OBJECT(dev)), mc->name); + return; +diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c +index a99c6e4fe3..447ea35275 100644 +--- a/hw/i386/acpi-build.c ++++ b/hw/i386/acpi-build.c +@@ -230,6 +230,9 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) + pm->fadt.reset_reg = r; + pm->fadt.reset_val = 0xf; + pm->fadt.flags |= 1 << ACPI_FADT_F_RESET_REG_SUP; ++ if (object_property_get_bool(lpc, ++ "__com.redhat_force-rev1-fadt", NULL)) ++ pm->fadt.rev = 1; + pm->cpu_hp_io_base = ICH9_CPU_HOTPLUG_IO_BASE; + pm->smi_on_cpuhp = + !!(smi_features & BIT_ULL(ICH9_LPC_SMI_F_CPU_HOTPLUG_BIT)); +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index a2ef40ecbc..e8109954ca 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -371,6 +371,296 @@ GlobalProperty pc_compat_1_4[] = { + }; + const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); + ++/* This macro is for changes to properties that are RHEL specific, ++ * different to the current upstream and to be applied to the latest ++ * machine type. ++ */ ++GlobalProperty pc_rhel_compat[] = { ++ { TYPE_X86_CPU, "host-phys-bits", "on" }, ++ { TYPE_X86_CPU, "host-phys-bits-limit", "48" }, ++ { TYPE_X86_CPU, "vmx-entry-load-perf-global-ctrl", "off" }, ++ { TYPE_X86_CPU, "vmx-exit-load-perf-global-ctrl", "off" }, ++ /* bz 1508330 */ ++ { "vfio-pci", "x-no-geforce-quirks", "on" }, ++ /* bz 1941397 */ ++ { TYPE_X86_CPU, "kvm-asyncpf-int", "on" }, ++}; ++const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); ++ ++GlobalProperty pc_rhel_8_4_compat[] = { ++ /* pc_rhel_8_4_compat from pc_compat_5_2 */ ++ { "ICH9-LPC", "x-smi-cpu-hotunplug", "off" }, ++ { TYPE_X86_CPU, "kvm-asyncpf-int", "off" }, ++}; ++const size_t pc_rhel_8_4_compat_len = G_N_ELEMENTS(pc_rhel_8_4_compat); ++ ++GlobalProperty pc_rhel_8_3_compat[] = { ++ /* pc_rhel_8_3_compat from pc_compat_5_1 */ ++ { "ICH9-LPC", "x-smi-cpu-hotplug", "off" }, ++}; ++const size_t pc_rhel_8_3_compat_len = G_N_ELEMENTS(pc_rhel_8_3_compat); ++ ++GlobalProperty pc_rhel_8_2_compat[] = { ++ /* pc_rhel_8_2_compat from pc_compat_4_2 */ ++ { "mch", "smbase-smram", "off" }, ++}; ++const size_t pc_rhel_8_2_compat_len = G_N_ELEMENTS(pc_rhel_8_2_compat); ++ ++/* pc_rhel_8_1_compat is empty since pc_4_1_compat is */ ++GlobalProperty pc_rhel_8_1_compat[] = { }; ++const size_t pc_rhel_8_1_compat_len = G_N_ELEMENTS(pc_rhel_8_1_compat); ++ ++GlobalProperty pc_rhel_8_0_compat[] = { ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "intel-iommu", "dma-drain", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G3" "-" TYPE_X86_CPU, "rdtscp", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G4" "-" TYPE_X86_CPU, "rdtscp", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G4" "-" TYPE_X86_CPU, "npt", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G4" "-" TYPE_X86_CPU, "nrip-save", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G5" "-" TYPE_X86_CPU, "rdtscp", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G5" "-" TYPE_X86_CPU, "npt", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G5" "-" TYPE_X86_CPU, "nrip-save", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "EPYC" "-" TYPE_X86_CPU, "npt", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "EPYC" "-" TYPE_X86_CPU, "nrip-save", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "EPYC-IBPB" "-" TYPE_X86_CPU, "npt", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "EPYC-IBPB" "-" TYPE_X86_CPU, "nrip-save", "off" }, ++ /** The mpx=on entries from pc_compat_3_1 are in pc_rhel_7_6_compat **/ ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Cascadelake-Server" "-" TYPE_X86_CPU, "stepping", "5" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { TYPE_X86_CPU, "x-intel-pt-auto-level", "off" }, ++}; ++const size_t pc_rhel_8_0_compat_len = G_N_ELEMENTS(pc_rhel_8_0_compat); ++ ++/* Similar to PC_COMPAT_3_0 + PC_COMPAT_2_12, but: ++ * all of the 2_12 stuff was already in 7.6 from bz 1481253 ++ * x-migrate-smi-count comes from PC_COMPAT_2_11 but ++ * is really tied to kernel version so keep it off on 7.x ++ * machine types irrespective of host. ++ */ ++GlobalProperty pc_rhel_7_6_compat[] = { ++ /* pc_rhel_7_6_compat from pc_compat_3_0 */ ++ { TYPE_X86_CPU, "x-hv-synic-kvm-only", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_3_0 */ ++ { "Skylake-Server" "-" TYPE_X86_CPU, "pku", "off" }, ++ /* pc_rhel_7_6_compat from pc_compat_3_0 */ ++ { "Skylake-Server-IBRS" "-" TYPE_X86_CPU, "pku", "off" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { TYPE_X86_CPU, "x-migrate-smi-count", "off" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Skylake-Client" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Skylake-Client-IBRS" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Skylake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Skylake-Server-IBRS" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Cascadelake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Icelake-Client" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Icelake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, ++}; ++const size_t pc_rhel_7_6_compat_len = G_N_ELEMENTS(pc_rhel_7_6_compat); ++ ++/* Similar to PC_COMPAT_2_11 + PC_COMPAT_2_10, but: ++ * - x-hv-max-vps was backported to 7.5 ++ * - x-pci-hole64-fix was backported to 7.5 ++ */ ++GlobalProperty pc_rhel_7_5_compat[] = { ++ /* pc_rhel_7_5_compat from pc_compat_2_11 */ ++ { "Skylake-Server" "-" TYPE_X86_CPU, "clflushopt", "off" }, ++ /* pc_rhel_7_5_compat from pc_compat_2_12 */ ++ { TYPE_X86_CPU, "legacy-cache", "on" }, ++ /* pc_rhel_7_5_compat from pc_compat_2_12 */ ++ { TYPE_X86_CPU, "topoext", "off" }, ++ /* pc_rhel_7_5_compat from pc_compat_2_12 */ ++ { "EPYC-" TYPE_X86_CPU, "xlevel", stringify(0x8000000a) }, ++ /* pc_rhel_7_5_compat from pc_compat_2_12 */ ++ { "EPYC-IBPB-" TYPE_X86_CPU, "xlevel", stringify(0x8000000a) }, ++}; ++const size_t pc_rhel_7_5_compat_len = G_N_ELEMENTS(pc_rhel_7_5_compat); ++ ++GlobalProperty pc_rhel_7_4_compat[] = { ++ /* pc_rhel_7_4_compat from pc_compat_2_9 */ ++ { "mch", "extended-tseg-mbytes", stringify(0) }, ++ /* bz 1489800 */ ++ { "ICH9-LPC", "__com.redhat_force-rev1-fadt", "on" }, ++ /* pc_rhel_7_4_compat from pc_compat_2_10 */ ++ { "i440FX-pcihost", "x-pci-hole64-fix", "off" }, ++ /* pc_rhel_7_4_compat from pc_compat_2_10 */ ++ { "q35-pcihost", "x-pci-hole64-fix", "off" }, ++ /* pc_rhel_7_4_compat from pc_compat_2_10 */ ++ { TYPE_X86_CPU, "x-hv-max-vps", "0x40" }, ++}; ++const size_t pc_rhel_7_4_compat_len = G_N_ELEMENTS(pc_rhel_7_4_compat); ++ ++GlobalProperty pc_rhel_7_3_compat[] = { ++ /* pc_rhel_7_3_compat from pc_compat_2_8 */ ++ { "kvmclock", "x-mach-use-reliable-get-clock", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_7 */ ++ { TYPE_X86_CPU, "l3-cache", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_7 */ ++ { TYPE_X86_CPU, "full-cpuid-auto-level", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_7 */ ++ { "Opteron_G3" "-" TYPE_X86_CPU, "family", "15" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_7 */ ++ { "Opteron_G3" "-" TYPE_X86_CPU, "model", "6" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_7 */ ++ { "Opteron_G3" "-" TYPE_X86_CPU, "stepping", "1" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_7 */ ++ { "isa-pcspk", "migrate", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_6 */ ++ { TYPE_X86_CPU, "cpuid-0xb", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_8 */ ++ { "ICH9-LPC", "x-smi-broadcast", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_8 */ ++ { TYPE_X86_CPU, "vmware-cpuid-freq", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_8 */ ++ { "Haswell-" TYPE_X86_CPU, "stepping", "1" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_3 added in 2.9*/ ++ { TYPE_X86_CPU, "kvm-no-smi-migration", "on" }, ++}; ++const size_t pc_rhel_7_3_compat_len = G_N_ELEMENTS(pc_rhel_7_3_compat); ++ ++GlobalProperty pc_rhel_7_2_compat[] = { ++ { "phenom" "-" TYPE_X86_CPU, "rdtscp", "off"}, ++ { "qemu64" "-" TYPE_X86_CPU, "sse4a", "on" }, ++ { "qemu64" "-" TYPE_X86_CPU, "abm", "on" }, ++ { "Haswell-" TYPE_X86_CPU, "abm", "off" }, ++ { "Haswell-IBRS" "-" TYPE_X86_CPU, "abm", "off" }, ++ { "Haswell-noTSX-" TYPE_X86_CPU, "abm", "off" }, ++ { "Haswell-noTSX-IBRS" "-" TYPE_X86_CPU, "abm", "off" }, ++ { "Broadwell-" TYPE_X86_CPU, "abm", "off" }, ++ { "Broadwell-IBRS" "-" TYPE_X86_CPU, "abm", "off" }, ++ { "Broadwell-noTSX-" TYPE_X86_CPU, "abm", "off" }, ++ { "Broadwell-noTSX-IBRS" "-" TYPE_X86_CPU, "abm", "off" }, ++ { "host" "-" TYPE_X86_CPU, "host-cache-info", "on" }, ++ { TYPE_X86_CPU, "check", "off" }, ++ { "qemu32" "-" TYPE_X86_CPU, "popcnt", "on" }, ++ { TYPE_X86_CPU, "arat", "off" }, ++ { "usb-redir", "streams", "off" }, ++ { TYPE_X86_CPU, "fill-mtrr-mask", "off" }, ++ { "apic-common", "legacy-instance-id", "on" }, ++}; ++const size_t pc_rhel_7_2_compat_len = G_N_ELEMENTS(pc_rhel_7_2_compat); ++ ++GlobalProperty pc_rhel_7_1_compat[] = { ++ { "kvm64" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "kvm32" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Conroe" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Penryn" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Nehalem" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Nehalem-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Westmere" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Westmere-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "SandyBridge" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "SandyBridge-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Haswell" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Haswell-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Broadwell" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Broadwell-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Opteron_G1" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Opteron_G2" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Opteron_G3" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Opteron_G4" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Opteron_G5" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Haswell" "-" TYPE_X86_CPU, "f16c", "off" }, ++ { "Haswell-IBRS" "-" TYPE_X86_CPU, "f16c", "off" }, ++ { "Haswell" "-" TYPE_X86_CPU, "rdrand", "off" }, ++ { "Haswell-IBRS" "-" TYPE_X86_CPU, "rdrand", "off" }, ++ { "Broadwell" "-" TYPE_X86_CPU, "f16c", "off" }, ++ { "Broadwell-IBRS" "-" TYPE_X86_CPU, "f16c", "off" }, ++ { "Broadwell" "-" TYPE_X86_CPU, "rdrand", "off" }, ++ { "Broadwell-IBRS" "-" TYPE_X86_CPU, "rdrand", "off" }, ++ { "coreduo" "-" TYPE_X86_CPU, "vmx", "on" }, ++ { "core2duo" "-" TYPE_X86_CPU, "vmx", "on" }, ++ { "qemu64" "-" TYPE_X86_CPU, "min-level", stringify(4) }, ++ { "kvm64" "-" TYPE_X86_CPU, "min-level", stringify(5) }, ++ { "pentium3" "-" TYPE_X86_CPU, "min-level", stringify(2) }, ++ { "n270" "-" TYPE_X86_CPU, "min-level", stringify(5) }, ++ { "Conroe" "-" TYPE_X86_CPU, "min-level", stringify(4) }, ++ { "Penryn" "-" TYPE_X86_CPU, "min-level", stringify(4) }, ++ { "Nehalem" "-" TYPE_X86_CPU, "min-level", stringify(4) }, ++ { "n270" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Penryn" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Conroe" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Nehalem" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Westmere" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "SandyBridge" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "IvyBridge" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Haswell" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Haswell-noTSX" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Broadwell" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Broadwell-noTSX" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++}; ++const size_t pc_rhel_7_1_compat_len = G_N_ELEMENTS(pc_rhel_7_1_compat); ++ ++/* ++ * The PC_RHEL_*_COMPAT serve the same purpose for RHEL-7 machine ++ * types as the PC_COMPAT_* do for upstream types. ++ * PC_RHEL_7_*_COMPAT apply both to i440fx and q35 types. ++ */ ++ ++/* ++ * RHEL-7 is based on QEMU 1.5.3, so this needs the PC_COMPAT_* ++ * between our base and 1.5, less stuff backported to RHEL-7.0 ++ * (usb-device.msos-desc), less stuff for devices we changed ++ * (qemu64-x86_64-cpu) or don't support (hpet, pci-serial-2x, ++ * pci-serial-4x) in 7.0. ++ */ ++GlobalProperty pc_rhel_7_0_compat[] = { ++ { "virtio-scsi-pci", "any_layout", "off" }, ++ { "PIIX4_PM", "memory-hotplug-support", "off" }, ++ { "apic", "version", stringify(0x11) }, ++ { "nec-usb-xhci", "superspeed-ports-first", "off" }, ++ { "nec-usb-xhci", "force-pcie-endcap", "on" }, ++ { "pci-serial", "prog_if", stringify(0) }, ++ { "virtio-net-pci", "guest_announce", "off" }, ++ { "ICH9-LPC", "memory-hotplug-support", "off" }, ++ { "xio3130-downstream", COMPAT_PROP_PCP, "off" }, ++ { "ioh3420", COMPAT_PROP_PCP, "off" }, ++ { "PIIX4_PM", "acpi-pci-hotplug-with-bridge-support", "off" }, ++ { "e1000", "mitigation", "off" }, ++ { "virtio-net-pci", "ctrl_guest_offloads", "off" }, ++ { "Conroe" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Penryn" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Nehalem" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Nehalem-IBRS" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Westmere" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Westmere-IBRS" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Opteron_G1" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Opteron_G2" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Opteron_G3" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Opteron_G4" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Opteron_G5" "-" TYPE_X86_CPU, "x2apic", "on" }, ++}; ++const size_t pc_rhel_7_0_compat_len = G_N_ELEMENTS(pc_rhel_7_0_compat); ++ ++/* ++ * RHEL: These properties only apply to the RHEL exported machine types ++ * pc-4.2/2.11 for the purpose to have a limited upstream machines support ++ * which can be migrated to RHEL. Let's avoid touching hw_compat_4_2 directly ++ * so that we can have some isolation against the upstream code. ++ */ ++GlobalProperty hw_compat_4_2_extra[] = { ++ /* By default enlarge the default virtio-net-pci ROM to 512KB. */ ++ { "virtio-net-pci", "romsize", "0x80000" }, ++}; ++const size_t hw_compat_4_2_extra_len = G_N_ELEMENTS(hw_compat_4_2_extra); ++ + GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled) + { + GSIState *s; +@@ -904,7 +1194,8 @@ void pc_memory_init(PCMachineState *pcms, + option_rom_mr = g_malloc(sizeof(*option_rom_mr)); + memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, + &error_fatal); +- if (pcmc->pci_enabled) { ++ /* RH difference: See bz 1489800, explicitly make ROM ro */ ++ if (pcmc->pc_rom_ro) { + memory_region_set_readonly(option_rom_mr, true); + } + memory_region_add_subregion_overlap(rom_memory, +@@ -1694,6 +1985,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + pcmc->pvh_enabled = true; + pcmc->kvmclock_create_always = true; + assert(!mc->get_hotplug_handler); ++ pcmc->pc_rom_ro = true; ++ mc->async_pf_vmexit_disable = false; + mc->get_hotplug_handler = pc_get_hotplug_handler; + mc->hotplug_allowed = pc_hotplug_allowed; + mc->cpu_index_to_instance_props = x86_cpu_index_to_props; +@@ -1704,7 +1997,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + mc->has_hotpluggable_cpus = true; + mc->default_boot_order = "cad"; + mc->block_default_type = IF_IDE; +- mc->max_cpus = 255; ++ /* 240: max CPU count for RHEL */ ++ mc->max_cpus = 240; + mc->reset = pc_machine_reset; + mc->wakeup = pc_machine_wakeup; + hc->pre_plug = pc_machine_device_pre_plug_cb; +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index dda3f64f19..2885edffe9 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -50,6 +50,7 @@ + #include "qapi/error.h" + #include "qemu/error-report.h" + #include "sysemu/xen.h" ++#include "migration/migration.h" + #ifdef CONFIG_XEN + #include + #include "hw/xen/xen_pt.h" +@@ -174,8 +175,8 @@ static void pc_init1(MachineState *machine, + if (pcmc->smbios_defaults) { + MachineClass *mc = MACHINE_GET_CLASS(machine); + /* These values are guest ABI, do not change */ +- smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)", +- mc->name, pcmc->smbios_legacy_mode, ++ smbios_set_defaults("Red Hat", "KVM", ++ mc->desc, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, + pcmc->smbios_stream_product, + pcmc->smbios_stream_version, +@@ -314,6 +315,15 @@ static void pc_init1(MachineState *machine, + * hw_compat_*, pc_compat_*, or * pc_*_machine_options(). + */ + ++/* ++ * NOTE! Not all the upstream machine types are disabled for RHEL. For ++ * providing a very limited support for upstream machine types, pc machines ++ * 2.11 and 4.2 are exposed explicitly. This will make the below "#if" macros ++ * a bit messed up, but please read this comment first so that we can have a ++ * rough understanding of what we're going to do. ++ */ ++ ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void pc_compat_2_3_fn(MachineState *machine) + { + X86MachineState *x86ms = X86_MACHINE(machine); +@@ -389,6 +399,8 @@ static void pc_xen_hvm_init(MachineState *machine) + } + #endif + ++#endif /* Disabled for Red Hat Enterprise Linux */ ++ + #define DEFINE_I440FX_MACHINE(suffix, name, compatfn, optionfn) \ + static void pc_init_##suffix(MachineState *machine) \ + { \ +@@ -424,8 +436,10 @@ static void pc_i440fx_6_2_machine_options(MachineClass *m) + pcmc->default_cpu_version = 1; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v6_2, "pc-i440fx-6.2", NULL, + pc_i440fx_6_2_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_6_1_machine_options(MachineClass *m) + { +@@ -437,8 +451,10 @@ static void pc_i440fx_6_1_machine_options(MachineClass *m) + m->smp_props.prefer_sockets = true; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v6_1, "pc-i440fx-6.1", NULL, + pc_i440fx_6_1_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_6_0_machine_options(MachineClass *m) + { +@@ -449,8 +465,10 @@ static void pc_i440fx_6_0_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_6_0, pc_compat_6_0_len); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v6_0, "pc-i440fx-6.0", NULL, + pc_i440fx_6_0_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_5_2_machine_options(MachineClass *m) + { +@@ -461,8 +479,10 @@ static void pc_i440fx_5_2_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_5_2, pc_compat_5_2_len); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v5_2, "pc-i440fx-5.2", NULL, + pc_i440fx_5_2_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_5_1_machine_options(MachineClass *m) + { +@@ -477,8 +497,10 @@ static void pc_i440fx_5_1_machine_options(MachineClass *m) + pcmc->pci_root_uid = 1; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v5_1, "pc-i440fx-5.1", NULL, + pc_i440fx_5_1_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_5_0_machine_options(MachineClass *m) + { +@@ -491,8 +513,10 @@ static void pc_i440fx_5_0_machine_options(MachineClass *m) + m->auto_enable_numa_with_memdev = false; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v5_0, "pc-i440fx-5.0", NULL, + pc_i440fx_5_0_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_4_2_machine_options(MachineClass *m) + { +@@ -501,8 +525,21 @@ static void pc_i440fx_4_2_machine_options(MachineClass *m) + m->is_default = false; + compat_props_add(m->compat_props, hw_compat_4_2, hw_compat_4_2_len); + compat_props_add(m->compat_props, pc_compat_4_2, pc_compat_4_2_len); ++ ++ /* ++ * RHEL: Mark all upstream machines as deprecated because they're not ++ * supported by RHEL, even if exported. ++ */ ++ m->deprecation_reason = "Not supported by RHEL"; ++ /* ++ * RHEL: Specific compat properties to have limited support for upstream ++ * machines exported. ++ */ ++ compat_props_add(m->compat_props, hw_compat_4_2_extra, ++ hw_compat_4_2_extra_len); + } + ++/* RHEL: Export pc-4.2 */ + DEFINE_I440FX_MACHINE(v4_2, "pc-i440fx-4.2", NULL, + pc_i440fx_4_2_machine_options); + +@@ -515,8 +552,10 @@ static void pc_i440fx_4_1_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_4_1, pc_compat_4_1_len); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v4_1, "pc-i440fx-4.1", NULL, + pc_i440fx_4_1_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_4_0_machine_options(MachineClass *m) + { +@@ -529,8 +568,10 @@ static void pc_i440fx_4_0_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_4_0, pc_compat_4_0_len); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v4_0, "pc-i440fx-4.0", NULL, + pc_i440fx_4_0_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_3_1_machine_options(MachineClass *m) + { +@@ -546,8 +587,10 @@ static void pc_i440fx_3_1_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_3_1, pc_compat_3_1_len); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v3_1, "pc-i440fx-3.1", NULL, + pc_i440fx_3_1_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_3_0_machine_options(MachineClass *m) + { +@@ -556,8 +599,10 @@ static void pc_i440fx_3_0_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_3_0, pc_compat_3_0_len); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v3_0, "pc-i440fx-3.0", NULL, + pc_i440fx_3_0_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_2_12_machine_options(MachineClass *m) + { +@@ -566,8 +611,10 @@ static void pc_i440fx_2_12_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_2_12, pc_compat_2_12_len); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v2_12, "pc-i440fx-2.12", NULL, + pc_i440fx_2_12_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_2_11_machine_options(MachineClass *m) + { +@@ -576,9 +623,11 @@ static void pc_i440fx_2_11_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_2_11, pc_compat_2_11_len); + } + ++/* RHEL: Export pc-2.11 */ + DEFINE_I440FX_MACHINE(v2_11, "pc-i440fx-2.11", NULL, + pc_i440fx_2_11_machine_options); + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void pc_i440fx_2_10_machine_options(MachineClass *m) + { + pc_i440fx_2_11_machine_options(m); +@@ -951,3 +1000,224 @@ static void xenfv_3_1_machine_options(MachineClass *m) + DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init, + xenfv_3_1_machine_options); + #endif ++#endif /* Disabled for Red Hat Enterprise Linux */ ++ ++/* Red Hat Enterprise Linux machine types */ ++ ++/* Options for the latest rhel7 machine type */ ++static void pc_machine_rhel7_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ m->family = "pc_piix_Y"; ++ m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; ++ pcmc->default_nic_model = "e1000"; ++ pcmc->pci_root_uid = 0; ++ m->default_display = "std"; ++ m->no_parallel = 1; ++ m->numa_mem_supported = true; ++ m->auto_enable_numa_with_memdev = false; ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); ++ compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); ++ m->alias = "pc"; ++ m->is_default = 1; ++} ++ ++static void pc_init_rhel760(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel760_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_machine_rhel7_options(m); ++ m->desc = "RHEL 7.6.0 PC (i440FX + PIIX, 1996)"; ++ m->async_pf_vmexit_disable = true; ++ m->smbus_no_migration_support = true; ++ pcmc->pvh_enabled = false; ++ pcmc->default_cpu_version = CPU_VERSION_LEGACY; ++ pcmc->kvmclock_create_always = false; ++ /* From pc_i440fx_5_1_machine_options() */ ++ pcmc->pci_root_uid = 1; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_4, ++ hw_compat_rhel_8_4_len); ++ compat_props_add(m->compat_props, pc_rhel_8_4_compat, ++ pc_rhel_8_4_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_8_3, ++ hw_compat_rhel_8_3_len); ++ compat_props_add(m->compat_props, pc_rhel_8_3_compat, ++ pc_rhel_8_3_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_8_2, ++ hw_compat_rhel_8_2_len); ++ compat_props_add(m->compat_props, pc_rhel_8_2_compat, ++ pc_rhel_8_2_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); ++ compat_props_add(m->compat_props, pc_rhel_8_1_compat, pc_rhel_8_1_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); ++ compat_props_add(m->compat_props, pc_rhel_8_0_compat, pc_rhel_8_0_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); ++ compat_props_add(m->compat_props, pc_rhel_7_6_compat, pc_rhel_7_6_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel760, "pc-i440fx-rhel7.6.0", pc_init_rhel760, ++ pc_machine_rhel760_options); ++ ++static void pc_init_rhel750(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel750_options(MachineClass *m) ++{ ++ pc_machine_rhel760_options(m); ++ m->alias = NULL; ++ m->is_default = 0; ++ m->desc = "RHEL 7.5.0 PC (i440FX + PIIX, 1996)"; ++ m->auto_enable_numa_with_memhp = false; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); ++ compat_props_add(m->compat_props, pc_rhel_7_5_compat, pc_rhel_7_5_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel750, "pc-i440fx-rhel7.5.0", pc_init_rhel750, ++ pc_machine_rhel750_options); ++ ++static void pc_init_rhel740(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel740_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_machine_rhel750_options(m); ++ m->desc = "RHEL 7.4.0 PC (i440FX + PIIX, 1996)"; ++ pcmc->pc_rom_ro = false; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_4, hw_compat_rhel_7_4_len); ++ compat_props_add(m->compat_props, pc_rhel_7_4_compat, pc_rhel_7_4_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel740, "pc-i440fx-rhel7.4.0", pc_init_rhel740, ++ pc_machine_rhel740_options); ++ ++static void pc_init_rhel730(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel730_options(MachineClass *m) ++{ ++ X86MachineClass *x86mc = X86_MACHINE_CLASS(m); ++ pc_machine_rhel740_options(m); ++ m->desc = "RHEL 7.3.0 PC (i440FX + PIIX, 1996)"; ++ x86mc->fwcfg_dma_enabled = false; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_3, hw_compat_rhel_7_3_len); ++ compat_props_add(m->compat_props, pc_rhel_7_3_compat, pc_rhel_7_3_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel730, "pc-i440fx-rhel7.3.0", pc_init_rhel730, ++ pc_machine_rhel730_options); ++ ++ ++static void pc_init_rhel720(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel720_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ X86MachineClass *x86mc = X86_MACHINE_CLASS(m); ++ pc_machine_rhel730_options(m); ++ m->desc = "RHEL 7.2.0 PC (i440FX + PIIX, 1996)"; ++ /* From pc_i440fx_2_5_machine_options */ ++ x86mc->save_tsc_khz = false; ++ m->legacy_fw_cfg_order = 1; ++ /* Note: broken_reserved_end was already in 7.2 */ ++ /* From pc_i440fx_2_6_machine_options */ ++ pcmc->legacy_cpu_hotplug = true; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_2, hw_compat_rhel_7_2_len); ++ compat_props_add(m->compat_props, pc_rhel_7_2_compat, pc_rhel_7_2_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel720, "pc-i440fx-rhel7.2.0", pc_init_rhel720, ++ pc_machine_rhel720_options); ++ ++static void pc_compat_rhel710(MachineState *machine) ++{ ++ PCMachineState *pcms = PC_MACHINE(machine); ++ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); ++ ++ /* From pc_compat_2_2 */ ++ pcmc->rsdp_in_ram = false; ++ machine->suppress_vmdesc = true; ++ ++ /* From pc_compat_2_1 */ ++ pcmc->smbios_uuid_encoded = false; ++ x86_cpu_change_kvm_default("svm", NULL); ++ pcmc->enforce_aligned_dimm = false; ++ ++ /* Disable all the extra subsections that were added in 2.2 */ ++ migrate_pre_2_2 = true; ++ ++ /* From pc_i440fx_2_4_machine_options */ ++ pcmc->broken_reserved_end = true; ++} ++ ++static void pc_init_rhel710(MachineState *machine) ++{ ++ pc_compat_rhel710(machine); ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel710_options(MachineClass *m) ++{ ++ pc_machine_rhel720_options(m); ++ m->family = "pc_piix_Y"; ++ m->desc = "RHEL 7.1.0 PC (i440FX + PIIX, 1996)"; ++ m->default_display = "cirrus"; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_1, hw_compat_rhel_7_1_len); ++ compat_props_add(m->compat_props, pc_rhel_7_1_compat, pc_rhel_7_1_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel710, "pc-i440fx-rhel7.1.0", pc_init_rhel710, ++ pc_machine_rhel710_options); ++ ++static void pc_compat_rhel700(MachineState *machine) ++{ ++ PCMachineState *pcms = PC_MACHINE(machine); ++ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); ++ ++ pc_compat_rhel710(machine); ++ ++ /* Upstream enables it for everyone, we're a little more selective */ ++ x86_cpu_change_kvm_default("x2apic", NULL); ++ x86_cpu_change_kvm_default("svm", NULL); ++ pcmc->legacy_acpi_table_size = 6418; /* see pc_compat_2_0() */ ++ pcmc->smbios_legacy_mode = true; ++ pcmc->has_reserved_memory = false; ++ migrate_cve_2014_5263_xhci_fields = true; ++} ++ ++static void pc_init_rhel700(MachineState *machine) ++{ ++ pc_compat_rhel700(machine); ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel700_options(MachineClass *m) ++{ ++ pc_machine_rhel710_options(m); ++ m->family = "pc_piix_Y"; ++ m->desc = "RHEL 7.0.0 PC (i440FX + PIIX, 1996)"; ++ compat_props_add(m->compat_props, pc_rhel_7_0_compat, pc_rhel_7_0_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel700, "pc-i440fx-rhel7.0.0", pc_init_rhel700, ++ pc_machine_rhel700_options); +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 235054a643..c67418b6a9 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -197,8 +197,8 @@ static void pc_q35_init(MachineState *machine) + + if (pcmc->smbios_defaults) { + /* These values are guest ABI, do not change */ +- smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", +- mc->name, pcmc->smbios_legacy_mode, ++ smbios_set_defaults("Red Hat", "KVM", ++ mc->desc, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, + pcmc->smbios_stream_product, + pcmc->smbios_stream_version, +@@ -342,6 +342,7 @@ static void pc_q35_init(MachineState *machine) + DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) + + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void pc_q35_machine_options(MachineClass *m) + { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); +@@ -620,3 +621,232 @@ static void pc_q35_2_4_machine_options(MachineClass *m) + + DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, + pc_q35_2_4_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ ++ ++/* Red Hat Enterprise Linux machine types */ ++ ++/* Options for the latest rhel q35 machine type */ ++static void pc_q35_machine_rhel_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pcmc->default_nic_model = "e1000e"; ++ pcmc->pci_root_uid = 0; ++ m->family = "pc_q35_Z"; ++ m->units_per_default_bus = 1; ++ m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; ++ m->default_display = "std"; ++ m->no_floppy = 1; ++ m->no_parallel = 1; ++ pcmc->default_cpu_version = 1; ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_AMD_IOMMU_DEVICE); ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE); ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); ++ m->alias = "q35"; ++ m->max_cpus = 710; ++ compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); ++} ++ ++static void pc_q35_init_rhel850(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel850_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel_options(m); ++ m->desc = "RHEL-8.5.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.5.0"; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel850, "pc-q35-rhel8.5.0", pc_q35_init_rhel850, ++ pc_q35_machine_rhel850_options); ++ ++ ++static void pc_q35_init_rhel840(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel840_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel850_options(m); ++ m->desc = "RHEL-8.4.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.4.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_4, ++ hw_compat_rhel_8_4_len); ++ compat_props_add(m->compat_props, pc_rhel_8_4_compat, ++ pc_rhel_8_4_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel840, "pc-q35-rhel8.4.0", pc_q35_init_rhel840, ++ pc_q35_machine_rhel840_options); ++ ++ ++static void pc_q35_init_rhel830(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel830_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel840_options(m); ++ m->desc = "RHEL-8.3.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.3.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_3, ++ hw_compat_rhel_8_3_len); ++ compat_props_add(m->compat_props, pc_rhel_8_3_compat, ++ pc_rhel_8_3_compat_len); ++ /* From pc_q35_5_1_machine_options() */ ++ pcmc->kvmclock_create_always = false; ++ /* From pc_q35_5_1_machine_options() */ ++ pcmc->pci_root_uid = 1; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel830, "pc-q35-rhel8.3.0", pc_q35_init_rhel830, ++ pc_q35_machine_rhel830_options); ++ ++static void pc_q35_init_rhel820(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel820_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel830_options(m); ++ m->desc = "RHEL-8.2.0 PC (Q35 + ICH9, 2009)"; ++ m->numa_mem_supported = true; ++ m->auto_enable_numa_with_memdev = false; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.2.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_2, ++ hw_compat_rhel_8_2_len); ++ compat_props_add(m->compat_props, pc_rhel_8_2_compat, ++ pc_rhel_8_2_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel820, "pc-q35-rhel8.2.0", pc_q35_init_rhel820, ++ pc_q35_machine_rhel820_options); ++ ++static void pc_q35_init_rhel810(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel810_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel820_options(m); ++ m->desc = "RHEL-8.1.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; ++ pcmc->smbios_stream_product = NULL; ++ pcmc->smbios_stream_version = NULL; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); ++ compat_props_add(m->compat_props, pc_rhel_8_1_compat, pc_rhel_8_1_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel810, "pc-q35-rhel8.1.0", pc_q35_init_rhel810, ++ pc_q35_machine_rhel810_options); ++ ++static void pc_q35_init_rhel800(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel800_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel810_options(m); ++ m->desc = "RHEL-8.0.0 PC (Q35 + ICH9, 2009)"; ++ m->smbus_no_migration_support = true; ++ m->alias = NULL; ++ pcmc->pvh_enabled = false; ++ pcmc->default_cpu_version = CPU_VERSION_LEGACY; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); ++ compat_props_add(m->compat_props, pc_rhel_8_0_compat, pc_rhel_8_0_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel800, "pc-q35-rhel8.0.0", pc_q35_init_rhel800, ++ pc_q35_machine_rhel800_options); ++ ++static void pc_q35_init_rhel760(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel760_options(MachineClass *m) ++{ ++ pc_q35_machine_rhel800_options(m); ++ m->alias = NULL; ++ m->desc = "RHEL-7.6.0 PC (Q35 + ICH9, 2009)"; ++ m->async_pf_vmexit_disable = true; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); ++ compat_props_add(m->compat_props, pc_rhel_7_6_compat, pc_rhel_7_6_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel760, "pc-q35-rhel7.6.0", pc_q35_init_rhel760, ++ pc_q35_machine_rhel760_options); ++ ++static void pc_q35_init_rhel750(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel750_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel760_options(m); ++ m->alias = NULL; ++ m->desc = "RHEL-7.5.0 PC (Q35 + ICH9, 2009)"; ++ m->auto_enable_numa_with_memhp = false; ++ pcmc->default_nic_model = "e1000"; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); ++ compat_props_add(m->compat_props, pc_rhel_7_5_compat, pc_rhel_7_5_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel750, "pc-q35-rhel7.5.0", pc_q35_init_rhel750, ++ pc_q35_machine_rhel750_options); ++ ++static void pc_q35_init_rhel740(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel740_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel750_options(m); ++ m->desc = "RHEL-7.4.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->pc_rom_ro = false; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_4, hw_compat_rhel_7_4_len); ++ compat_props_add(m->compat_props, pc_rhel_7_4_compat, pc_rhel_7_4_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel740, "pc-q35-rhel7.4.0", pc_q35_init_rhel740, ++ pc_q35_machine_rhel740_options); ++ ++static void pc_q35_init_rhel730(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel730_options(MachineClass *m) ++{ ++ X86MachineClass *x86mc = X86_MACHINE_CLASS(m); ++ pc_q35_machine_rhel740_options(m); ++ m->desc = "RHEL-7.3.0 PC (Q35 + ICH9, 2009)"; ++ m->max_cpus = 255; ++ x86mc->fwcfg_dma_enabled = false; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_3, hw_compat_rhel_7_3_len); ++ compat_props_add(m->compat_props, pc_rhel_7_3_compat, pc_rhel_7_3_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel730, "pc-q35-rhel7.3.0", pc_q35_init_rhel730, ++ pc_q35_machine_rhel730_options); +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 8bba96ef2b..04e8759815 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -263,6 +263,8 @@ struct MachineClass { + strList *allowed_dynamic_sysbus_devices; + bool auto_enable_numa_with_memhp; + bool auto_enable_numa_with_memdev; ++ /* RHEL only */ ++ bool async_pf_vmexit_disable; + bool ignore_boot_device_suffixes; + bool smbus_no_migration_support; + bool nvdimm_supported; +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 7ccc9a1a07..d0544ee119 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -125,6 +125,9 @@ struct PCMachineClass { + + /* create kvmclock device even when KVM PV features are not exposed */ + bool kvmclock_create_always; ++ ++ /* RH only, see bz 1489800 */ ++ bool pc_rom_ro; + }; + + #define TYPE_PC_MACHINE "generic-pc-machine" +@@ -280,6 +283,48 @@ extern const size_t pc_compat_1_5_len; + extern GlobalProperty pc_compat_1_4[]; + extern const size_t pc_compat_1_4_len; + ++extern GlobalProperty pc_rhel_compat[]; ++extern const size_t pc_rhel_compat_len; ++ ++extern GlobalProperty pc_rhel_8_4_compat[]; ++extern const size_t pc_rhel_8_4_compat_len; ++ ++extern GlobalProperty pc_rhel_8_3_compat[]; ++extern const size_t pc_rhel_8_3_compat_len; ++ ++extern GlobalProperty pc_rhel_8_2_compat[]; ++extern const size_t pc_rhel_8_2_compat_len; ++ ++extern GlobalProperty pc_rhel_8_1_compat[]; ++extern const size_t pc_rhel_8_1_compat_len; ++ ++extern GlobalProperty pc_rhel_8_0_compat[]; ++extern const size_t pc_rhel_8_0_compat_len; ++ ++extern GlobalProperty pc_rhel_7_6_compat[]; ++extern const size_t pc_rhel_7_6_compat_len; ++ ++extern GlobalProperty pc_rhel_7_5_compat[]; ++extern const size_t pc_rhel_7_5_compat_len; ++ ++extern GlobalProperty pc_rhel_7_4_compat[]; ++extern const size_t pc_rhel_7_4_compat_len; ++ ++extern GlobalProperty pc_rhel_7_3_compat[]; ++extern const size_t pc_rhel_7_3_compat_len; ++ ++extern GlobalProperty pc_rhel_7_2_compat[]; ++extern const size_t pc_rhel_7_2_compat_len; ++ ++extern GlobalProperty pc_rhel_7_1_compat[]; ++extern const size_t pc_rhel_7_1_compat_len; ++ ++extern GlobalProperty pc_rhel_7_0_compat[]; ++extern const size_t pc_rhel_7_0_compat_len; ++ ++extern GlobalProperty hw_compat_4_2_extra[]; ++extern const size_t hw_compat_4_2_extra_len; ++ + /* Helper for setting model-id for CPU models that changed model-id + * depending on QEMU versions up to QEMU 2.4. + */ +diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c +index d95028018e..7b004065ae 100644 +--- a/target/i386/kvm/kvm-cpu.c ++++ b/target/i386/kvm/kvm-cpu.c +@@ -131,6 +131,7 @@ static PropValue kvm_default_props[] = { + { "acpi", "off" }, + { "monitor", "off" }, + { "svm", "off" }, ++ { "kvm-pv-unhalt", "on" }, + { NULL, NULL }, + }; + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 5a698bde19..a668f521ac 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -3336,6 +3336,7 @@ static int kvm_get_msrs(X86CPU *cpu) + struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; + int ret, i; + uint64_t mtrr_top_bits; ++ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); + + kvm_msr_buf_reset(cpu); + +@@ -3665,6 +3666,9 @@ static int kvm_get_msrs(X86CPU *cpu) + break; + case MSR_KVM_ASYNC_PF_EN: + env->async_pf_en_msr = msrs[i].data; ++ if (mc->async_pf_vmexit_disable) { ++ env->async_pf_en_msr &= ~(1ULL << 2); ++ } + break; + case MSR_KVM_ASYNC_PF_INT: + env->async_pf_int_msr = msrs[i].data; +diff --git a/tests/qtest/pvpanic-test.c b/tests/qtest/pvpanic-test.c +index 6dcad2db49..580c2c43d2 100644 +--- a/tests/qtest/pvpanic-test.c ++++ b/tests/qtest/pvpanic-test.c +@@ -17,7 +17,7 @@ static void test_panic_nopause(void) + QDict *response, *data; + QTestState *qts; + +- qts = qtest_init("-device pvpanic -action panic=none"); ++ qts = qtest_init("-M q35 -device pvpanic -action panic=none"); + + val = qtest_inb(qts, 0x505); + g_assert_cmpuint(val, ==, 3); +@@ -40,7 +40,8 @@ static void test_panic(void) + QDict *response, *data; + QTestState *qts; + +- qts = qtest_init("-device pvpanic -action panic=pause"); ++ /* RHEL: Use q35 */ ++ qts = qtest_init("-M q35 -device pvpanic -action panic=pause"); + + val = qtest_inb(qts, 0x505); + g_assert_cmpuint(val, ==, 3); +-- +2.27.0 + diff --git a/0011-Enable-make-check.patch b/0011-Enable-make-check.patch new file mode 100644 index 0000000..832b38d --- /dev/null +++ b/0011-Enable-make-check.patch @@ -0,0 +1,186 @@ +From 5e419e5e0a721bdbbfa6d9b82c8be5c5b3d26a01 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 2 Sep 2020 09:39:41 +0200 +Subject: Enable make check + +Fixing tests after device disabling and machine types changes and enabling +make check run during build. + +Signed-off-by: Miroslav Rezanina +--- +Rebase changes (6.1.0): +- removed unnecessary test changes + +Rebase changes (6.2.0): +- new way of disabling bios-table-test + +Rebase changes (7.0.0): +- Disable testing virtio-iommu-pci +- Rename default_bus_bypass_iommu property to default-bus-bypass-iommu +- Disable qtest-bios-table for aarch64 +- Removed redhat chunks for boot-serial-test.c, cdrom-test.c and cpu-plug-test.c qtests +- Do not disable boot-order-test, prom-env-test and boot-serial-test qtests +- Use rhel machine type for new intel hda qtest +- Remove unnecessary changes in iotest 051 +- Remove changes in bios-tables-test.c and prom-env-test.c qtests + +Merged patches (6.1.0): +- 2f129df7d3 redhat: Enable the 'test-block-iothread' test again +--- + .distro/qemu-kvm.spec.template | 5 ++--- + tests/qtest/fuzz-e1000e-test.c | 2 +- + tests/qtest/fuzz-virtio-scsi-test.c | 2 +- + tests/qtest/intel-hda-test.c | 2 +- + tests/qtest/libqos/meson.build | 2 +- + tests/qtest/lpc-ich9-test.c | 2 +- + tests/qtest/meson.build | 4 ---- + tests/qtest/usb-hcd-xhci-test.c | 4 ++++ + tests/qtest/virtio-net-failover.c | 1 + + 9 files changed, 12 insertions(+), 12 deletions(-) + +diff --git a/tests/qtest/fuzz-e1000e-test.c b/tests/qtest/fuzz-e1000e-test.c +index 66229e6096..947fba73b7 100644 +--- a/tests/qtest/fuzz-e1000e-test.c ++++ b/tests/qtest/fuzz-e1000e-test.c +@@ -17,7 +17,7 @@ static void test_lp1879531_eth_get_rss_ex_dst_addr(void) + { + QTestState *s; + +- s = qtest_init("-nographic -monitor none -serial none -M pc-q35-5.0"); ++ s = qtest_init("-nographic -monitor none -serial none -M pc-q35-rhel8.4.0"); + + qtest_outl(s, 0xcf8, 0x80001010); + qtest_outl(s, 0xcfc, 0xe1020000); +diff --git a/tests/qtest/fuzz-virtio-scsi-test.c b/tests/qtest/fuzz-virtio-scsi-test.c +index aaf6d10e18..43727d62ac 100644 +--- a/tests/qtest/fuzz-virtio-scsi-test.c ++++ b/tests/qtest/fuzz-virtio-scsi-test.c +@@ -19,7 +19,7 @@ static void test_mmio_oob_from_memory_region_cache(void) + { + QTestState *s; + +- s = qtest_init("-M pc-q35-5.2 -display none -m 512M " ++ s = qtest_init("-M pc-q35-rhel8.4.0 -display none -m 512M " + "-device virtio-scsi,num_queues=8,addr=03.0 "); + + qtest_outl(s, 0xcf8, 0x80001811); +diff --git a/tests/qtest/intel-hda-test.c b/tests/qtest/intel-hda-test.c +index a58c98e4d1..c8387e39ce 100644 +--- a/tests/qtest/intel-hda-test.c ++++ b/tests/qtest/intel-hda-test.c +@@ -38,7 +38,7 @@ static void test_issue542_ich6(void) + { + QTestState *s; + +- s = qtest_init("-nographic -nodefaults -M pc-q35-6.2 " ++ s = qtest_init("-nographic -nodefaults -M pc-q35-rhel9.0.0 " + "-device intel-hda,id=" HDA_ID CODEC_DEVICES); + + qtest_outl(s, 0xcf8, 0x80000804); +diff --git a/tests/qtest/libqos/meson.build b/tests/qtest/libqos/meson.build +index e988d15791..46f7dcb81a 100644 +--- a/tests/qtest/libqos/meson.build ++++ b/tests/qtest/libqos/meson.build +@@ -41,7 +41,7 @@ libqos_srcs = files('../libqtest.c', + 'virtio-rng.c', + 'virtio-scsi.c', + 'virtio-serial.c', +- 'virtio-iommu.c', ++# 'virtio-iommu.c', + + # qgraph machines: + 'aarch64-xlnx-zcu102-machine.c', +diff --git a/tests/qtest/lpc-ich9-test.c b/tests/qtest/lpc-ich9-test.c +index fe0bef9980..7a9d51579b 100644 +--- a/tests/qtest/lpc-ich9-test.c ++++ b/tests/qtest/lpc-ich9-test.c +@@ -15,7 +15,7 @@ static void test_lp1878642_pci_bus_get_irq_level_assert(void) + { + QTestState *s; + +- s = qtest_init("-M pc-q35-5.0 " ++ s = qtest_init("-M pc-q35-rhel8.4.0 " + "-nographic -monitor none -serial none"); + + qtest_outl(s, 0xcf8, 0x8000f840); /* PMBASE */ +diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build +index d25f82bb5a..67cd32def1 100644 +--- a/tests/qtest/meson.build ++++ b/tests/qtest/meson.build +@@ -73,7 +73,6 @@ qtests_i386 = \ + config_all_devices.has_key('CONFIG_Q35') and \ + config_all_devices.has_key('CONFIG_VIRTIO_PCI') and \ + slirp.found() ? ['virtio-net-failover'] : []) + \ +- (unpack_edk2_blobs ? ['bios-tables-test'] : []) + \ + qtests_pci + \ + ['fdc-test', + 'ide-test', +@@ -86,7 +85,6 @@ qtests_i386 = \ + 'drive_del-test', + 'tco-test', + 'cpu-plug-test', +- 'q35-test', + 'vmgenid-test', + 'migration-test', + 'test-x86-cpuid-compat', +@@ -216,7 +214,6 @@ qtests_arm = \ + + # TODO: once aarch64 TCG is fixed on ARM 32 bit host, make bios-tables-test unconditional + qtests_aarch64 = \ +- (cpu != 'arm' and unpack_edk2_blobs ? ['bios-tables-test'] : []) + \ + (config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? ['tpm-tis-device-test'] : []) + \ + (config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? ['tpm-tis-device-swtpm-test'] : []) + \ + (config_all_devices.has_key('CONFIG_XLNX_ZYNQMP_ARM') ? ['xlnx-can-test', 'fuzz-xlnx-dp-test'] : []) + \ +@@ -231,7 +228,6 @@ qtests_s390x = \ + (config_host.has_key('CONFIG_POSIX') ? ['test-filter-redirector'] : []) + \ + ['boot-serial-test', + 'drive_del-test', +- 'device-plug-test', + 'virtio-ccw-test', + 'cpu-plug-test', + 'migration-test'] +diff --git a/tests/qtest/usb-hcd-xhci-test.c b/tests/qtest/usb-hcd-xhci-test.c +index 10ef9d2a91..3855873050 100644 +--- a/tests/qtest/usb-hcd-xhci-test.c ++++ b/tests/qtest/usb-hcd-xhci-test.c +@@ -21,6 +21,7 @@ static void test_xhci_hotplug(void) + usb_test_hotplug(global_qtest, "xhci", "1", NULL); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void test_usb_uas_hotplug(void) + { + QTestState *qts = global_qtest; +@@ -36,6 +37,7 @@ static void test_usb_uas_hotplug(void) + qtest_qmp_device_del(qts, "scsihd"); + qtest_qmp_device_del(qts, "uas"); + } ++#endif + + static void test_usb_ccid_hotplug(void) + { +@@ -56,7 +58,9 @@ int main(int argc, char **argv) + + qtest_add_func("/xhci/pci/init", test_xhci_init); + qtest_add_func("/xhci/pci/hotplug", test_xhci_hotplug); ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + qtest_add_func("/xhci/pci/hotplug/usb-uas", test_usb_uas_hotplug); ++#endif + qtest_add_func("/xhci/pci/hotplug/usb-ccid", test_usb_ccid_hotplug); + + qtest_start("-device nec-usb-xhci,id=xhci" +diff --git a/tests/qtest/virtio-net-failover.c b/tests/qtest/virtio-net-failover.c +index 78811f1c92..44de8af00c 100644 +--- a/tests/qtest/virtio-net-failover.c ++++ b/tests/qtest/virtio-net-failover.c +@@ -25,6 +25,7 @@ + #define PCI_SEL_BASE 0x0010 + + #define BASE_MACHINE "-M q35 -nodefaults " \ ++ "-global ICH9-LPC.acpi-pci-hotplug-with-bridge-support=on " \ + "-device pcie-root-port,id=root0,addr=0x1,bus=pcie.0,chassis=1 " \ + "-device pcie-root-port,id=root1,addr=0x2,bus=pcie.0,chassis=2 " + +-- +2.31.1 + diff --git a/0012-Enable-make-check.patch b/0012-Enable-make-check.patch new file mode 100644 index 0000000..b2ff35a --- /dev/null +++ b/0012-Enable-make-check.patch @@ -0,0 +1,407 @@ +From 740a2dd943a2e0fcd41a9cd8eb94a136f8f49fa2 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 2 Sep 2020 09:39:41 +0200 +Subject: Enable make check + +Fixing tests after device disabling and machine types changes and enabling +make check run during build. + +Signed-off-by: Miroslav Rezanina + +Rebase changes (4.0.0): +- Remove testing for pseries-2.7 in endianess test +- Disable device-plug-test on s390x as it use disabled device +- Do not run cpu-plug-tests on 7.3 and older machine types + +Rebase changes (4.1.0-rc0): +- removed iotests 068 + +Rebase changes (4.1.0-rc1): +- remove all 205 tests (unstable) + +Rebase changes (4.2.0-rc0): +- partially disable hd-geo-test (requires lsi53c895a) + +Rebase changes (5.1.0-rc1): +- Disable qtest/q35-test (uses upstream machine types) +- Do not run iotests on make checka +- Enabled iotests 071 and 099 + +Rebase changes (5.2.0 rc0): +- Disable cdrom tests (unsupported devices) on x86_64 +- disable fuzz test + +Rebase changes (6.0.0): +- Disabled xlnx-can-test +- Disable pxb-pcie subtest for bios-table-test +- Replace qtest usage of upstream q35 machine type with pc-q35-rhel8.4.0 +- Not run cdrom-test on aarch64 + +Rebase changes (6.1.0): +- Remove unnecessary test disabling changes + +Rebase changes (weekly-211006): +- New handling for bios-table-test (disabled downstream) + +Merged patches (4.0.0): +- f7ffd13 Remove 7 qcow2 and luks iotests that are taking > 25 sec to run during the fast train build proce + +Merged patches (4.1.0-rc0): +- 41288ff redhat: Remove raw iotest 205 +--- + redhat/qemu-kvm.spec.template | 2 +- + tests/qemu-iotests/051 | 8 ++++---- + tests/qtest/bios-tables-test.c | 5 ++++- + tests/qtest/boot-serial-test.c | 6 +++++- + tests/qtest/cdrom-test.c | 4 ++++ + tests/qtest/cpu-plug-test.c | 4 ++-- + tests/qtest/fuzz-e1000e-test.c | 2 +- + tests/qtest/fuzz-virtio-scsi-test.c | 2 +- + tests/qtest/hd-geo-test.c | 4 ++++ + tests/qtest/lpc-ich9-test.c | 2 +- + tests/qtest/meson.build | 13 ++++--------- + tests/qtest/prom-env-test.c | 4 ++++ + tests/qtest/test-x86-cpuid-compat.c | 2 ++ + tests/qtest/usb-hcd-xhci-test.c | 4 ++++ + 14 files changed, 41 insertions(+), 21 deletions(-) + +diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 +index 1d2fa93a11..c8a2815f54 100755 +--- a/tests/qemu-iotests/051 ++++ b/tests/qemu-iotests/051 +@@ -174,9 +174,9 @@ run_qemu -drive if=virtio + case "$QEMU_DEFAULT_MACHINE" in + pc) + run_qemu -drive if=none,id=disk -device ide-cd,drive=disk +- run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-cd,drive=disk ++# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-cd,drive=disk + run_qemu -drive if=none,id=disk -device ide-hd,drive=disk +- run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-hd,drive=disk ++# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-hd,drive=disk + ;; + *) + ;; +@@ -225,9 +225,9 @@ run_qemu -drive file="$TEST_IMG",if=virtio,readonly=on + case "$QEMU_DEFAULT_MACHINE" in + pc) + run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-cd,drive=disk +- run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-cd,drive=disk ++# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-cd,drive=disk + run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-hd,drive=disk +- run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-hd,drive=disk ++# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-hd,drive=disk + ;; + *) + ;; +diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c +index 258874167e..16d8304cde 100644 +--- a/tests/qtest/bios-tables-test.c ++++ b/tests/qtest/bios-tables-test.c +@@ -1372,6 +1372,7 @@ static void test_acpi_virt_tcg_numamem(void) + + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void test_acpi_virt_tcg_pxb(void) + { + test_data data = { +@@ -1403,6 +1404,7 @@ static void test_acpi_virt_tcg_pxb(void) + + free_test_data(&data); + } ++#endif + + static void test_acpi_tcg_acpi_hmat(const char *machine) + { +@@ -1644,7 +1646,8 @@ int main(int argc, char *argv[]) + qtest_add_func("acpi/virt", test_acpi_virt_tcg); + qtest_add_func("acpi/virt/numamem", test_acpi_virt_tcg_numamem); + qtest_add_func("acpi/virt/memhp", test_acpi_virt_tcg_memhp); +- qtest_add_func("acpi/virt/pxb", test_acpi_virt_tcg_pxb); ++ /* Disabled for Red Hat Enterprise Linux ++ qtest_add_func("acpi/virt/pxb", test_acpi_virt_tcg_pxb); */ + qtest_add_func("acpi/virt/oem-fields", test_acpi_oem_fields_virt); + } + } +diff --git a/tests/qtest/boot-serial-test.c b/tests/qtest/boot-serial-test.c +index 83828ba270..294476b959 100644 +--- a/tests/qtest/boot-serial-test.c ++++ b/tests/qtest/boot-serial-test.c +@@ -148,19 +148,23 @@ static testdef_t tests[] = { + { "ppc", "g3beige", "", "PowerPC,750" }, + { "ppc", "mac99", "", "PowerPC,G4" }, + { "ppc", "sam460ex", "-m 256", "DRAM: 256 MiB" }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { "ppc64", "ppce500", "", "U-Boot" }, + { "ppc64", "40p", "-m 192", "Memory: 192M" }, + { "ppc64", "mac99", "", "PowerPC,970FX" }, ++#endif + { "ppc64", "pseries", + "-machine " PSERIES_DEFAULT_CAPABILITIES, + "Open Firmware" }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { "ppc64", "powernv8", "", "OPAL" }, + { "ppc64", "powernv9", "", "OPAL" }, + { "ppc64", "sam460ex", "-device e1000", "8086 100e" }, ++#endif + { "i386", "isapc", "-cpu qemu32 -device sga", "SGABIOS" }, + { "i386", "pc", "-device sga", "SGABIOS" }, + { "i386", "q35", "-device sga", "SGABIOS" }, +- { "x86_64", "isapc", "-cpu qemu32 -device sga", "SGABIOS" }, ++ { "x86_64", "pc", "-cpu qemu32 -device sga", "SGABIOS" }, + { "x86_64", "q35", "-device sga", "SGABIOS" }, + { "sparc", "LX", "", "TMS390S10" }, + { "sparc", "SS-4", "", "MB86904" }, +diff --git a/tests/qtest/cdrom-test.c b/tests/qtest/cdrom-test.c +index 5af944a5fb..69d9bac38a 100644 +--- a/tests/qtest/cdrom-test.c ++++ b/tests/qtest/cdrom-test.c +@@ -140,6 +140,7 @@ static void add_x86_tests(void) + qtest_add_data_func("cdrom/boot/isapc", "-M isapc " + "-drive if=ide,media=cdrom,file=", test_cdboot); + } ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + qtest_add_data_func("cdrom/boot/am53c974", + "-device am53c974 -device scsi-cd,drive=cd1 " + "-drive if=none,id=cd1,format=raw,file=", test_cdboot); +@@ -155,6 +156,7 @@ static void add_x86_tests(void) + qtest_add_data_func("cdrom/boot/megasas-gen2", "-M q35 " + "-device megasas-gen2 -device scsi-cd,drive=cd1 " + "-blockdev file,node-name=cd1,filename=", test_cdboot); ++#endif + } + + static void add_s390x_tests(void) +@@ -220,6 +222,7 @@ int main(int argc, char **argv) + "magnum", "malta", "pica61", NULL + }; + add_cdrom_param_tests(mips64machines); ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + } else if (g_str_equal(arch, "arm") || g_str_equal(arch, "aarch64")) { + const char *armmachines[] = { + "realview-eb", "realview-eb-mpcore", "realview-pb-a8", +@@ -227,6 +230,7 @@ int main(int argc, char **argv) + "vexpress-a9", "virt", NULL + }; + add_cdrom_param_tests(armmachines); ++#endif + } else { + const char *nonemachine[] = { "none", NULL }; + add_cdrom_param_tests(nonemachine); +diff --git a/tests/qtest/cpu-plug-test.c b/tests/qtest/cpu-plug-test.c +index a1c689414b..a8f076711c 100644 +--- a/tests/qtest/cpu-plug-test.c ++++ b/tests/qtest/cpu-plug-test.c +@@ -110,8 +110,8 @@ static void add_pseries_test_case(const char *mname) + char *path; + PlugTestData *data; + +- if (!g_str_has_prefix(mname, "pseries-") || +- (g_str_has_prefix(mname, "pseries-2.") && atoi(&mname[10]) < 7)) { ++ if (!g_str_has_prefix(mname, "pseries-rhel") || ++ (g_str_has_prefix(mname, "pseries-rhel7.") && atoi(&mname[14]) < 4)) { + return; + } + data = g_new(PlugTestData, 1); +diff --git a/tests/qtest/fuzz-e1000e-test.c b/tests/qtest/fuzz-e1000e-test.c +index 66229e6096..947fba73b7 100644 +--- a/tests/qtest/fuzz-e1000e-test.c ++++ b/tests/qtest/fuzz-e1000e-test.c +@@ -17,7 +17,7 @@ static void test_lp1879531_eth_get_rss_ex_dst_addr(void) + { + QTestState *s; + +- s = qtest_init("-nographic -monitor none -serial none -M pc-q35-5.0"); ++ s = qtest_init("-nographic -monitor none -serial none -M pc-q35-rhel8.4.0"); + + qtest_outl(s, 0xcf8, 0x80001010); + qtest_outl(s, 0xcfc, 0xe1020000); +diff --git a/tests/qtest/fuzz-virtio-scsi-test.c b/tests/qtest/fuzz-virtio-scsi-test.c +index aaf6d10e18..43727d62ac 100644 +--- a/tests/qtest/fuzz-virtio-scsi-test.c ++++ b/tests/qtest/fuzz-virtio-scsi-test.c +@@ -19,7 +19,7 @@ static void test_mmio_oob_from_memory_region_cache(void) + { + QTestState *s; + +- s = qtest_init("-M pc-q35-5.2 -display none -m 512M " ++ s = qtest_init("-M pc-q35-rhel8.4.0 -display none -m 512M " + "-device virtio-scsi,num_queues=8,addr=03.0 "); + + qtest_outl(s, 0xcf8, 0x80001811); +diff --git a/tests/qtest/hd-geo-test.c b/tests/qtest/hd-geo-test.c +index 113126ae06..999ef2aace 100644 +--- a/tests/qtest/hd-geo-test.c ++++ b/tests/qtest/hd-geo-test.c +@@ -737,6 +737,7 @@ static void test_override_ide(void) + test_override(args, expected); + } + ++#if 0 /* Require lsi53c895a - not supported on RHEL */ + static void test_override_scsi(void) + { + TestArgs *args = create_args(); +@@ -781,6 +782,7 @@ static void test_override_scsi_2_controllers(void) + add_scsi_disk(args, 3, 1, 0, 1, 2, 0, 1, 0); + test_override(args, expected); + } ++#endif + + static void test_override_virtio_blk(void) + { +@@ -960,9 +962,11 @@ int main(int argc, char **argv) + qtest_add_func("hd-geo/ide/device/user/chst", test_ide_device_user_chst); + if (have_qemu_img()) { + qtest_add_func("hd-geo/override/ide", test_override_ide); ++#if 0 /* Require lsi53c895a - not supported on RHEL */ + qtest_add_func("hd-geo/override/scsi", test_override_scsi); + qtest_add_func("hd-geo/override/scsi_2_controllers", + test_override_scsi_2_controllers); ++#endif + qtest_add_func("hd-geo/override/virtio_blk", test_override_virtio_blk); + qtest_add_func("hd-geo/override/zero_chs", test_override_zero_chs); + qtest_add_func("hd-geo/override/scsi_hot_unplug", +diff --git a/tests/qtest/lpc-ich9-test.c b/tests/qtest/lpc-ich9-test.c +index fe0bef9980..7a9d51579b 100644 +--- a/tests/qtest/lpc-ich9-test.c ++++ b/tests/qtest/lpc-ich9-test.c +@@ -15,7 +15,7 @@ static void test_lp1878642_pci_bus_get_irq_level_assert(void) + { + QTestState *s; + +- s = qtest_init("-M pc-q35-5.0 " ++ s = qtest_init("-M pc-q35-rhel8.4.0 " + "-nographic -monitor none -serial none"); + + qtest_outl(s, 0xcf8, 0x8000f840); /* PMBASE */ +diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build +index c9d8458062..049e06c057 100644 +--- a/tests/qtest/meson.build ++++ b/tests/qtest/meson.build +@@ -68,7 +68,6 @@ qtests_i386 = \ + (config_all_devices.has_key('CONFIG_RTL8139_PCI') ? ['rtl8139-test'] : []) + \ + (config_all_devices.has_key('CONFIG_E1000E_PCI_EXPRESS') ? ['fuzz-e1000e-test'] : []) + \ + (config_all_devices.has_key('CONFIG_ESP_PCI') ? ['am53c974-test'] : []) + \ +- (unpack_edk2_blobs ? ['bios-tables-test'] : []) + \ + qtests_pci + \ + ['fdc-test', + 'ide-test', +@@ -81,7 +80,6 @@ qtests_i386 = \ + 'drive_del-test', + 'tco-test', + 'cpu-plug-test', +- 'q35-test', + 'vmgenid-test', + 'migration-test', + 'test-x86-cpuid-compat', +@@ -130,17 +128,15 @@ qtests_mips64el = \ + + qtests_ppc = \ + (config_all_devices.has_key('CONFIG_ISA_TESTDEV') ? ['endianness-test'] : []) + \ +- (config_all_devices.has_key('CONFIG_M48T59') ? ['m48t59-test'] : []) + \ +- ['boot-order-test', 'prom-env-test', 'boot-serial-test'] \ ++ (config_all_devices.has_key('CONFIG_M48T59') ? ['m48t59-test'] : []) + + qtests_ppc64 = \ + (config_all_devices.has_key('CONFIG_PSERIES') ? ['device-plug-test'] : []) + \ + (config_all_devices.has_key('CONFIG_POWERNV') ? ['pnv-xscom-test'] : []) + \ + (config_all_devices.has_key('CONFIG_PSERIES') ? ['rtas-test'] : []) + \ +- (slirp.found() ? ['pxe-test', 'test-netfilter'] : []) + \ ++ (slirp.found() ? ['pxe-test'] : []) + \ + (config_all_devices.has_key('CONFIG_USB_UHCI') ? ['usb-hcd-uhci-test'] : []) + \ + (config_all_devices.has_key('CONFIG_USB_XHCI_NEC') ? ['usb-hcd-xhci-test'] : []) + \ +- (config_host.has_key('CONFIG_POSIX') ? ['test-filter-mirror'] : []) + \ + qtests_pci + ['migration-test', 'numa-test', 'cpu-plug-test', 'drive_del-test'] + + qtests_sh4 = (config_all_devices.has_key('CONFIG_ISA_TESTDEV') ? ['endianness-test'] : []) +@@ -186,8 +182,8 @@ qtests_aarch64 = \ + ['arm-cpu-features', + 'numa-test', + 'boot-serial-test', +- 'xlnx-can-test', +- 'fuzz-xlnx-dp-test', ++# 'xlnx-can-test', ++# 'fuzz-xlnx-dp-test', + 'migration-test'] + + qtests_s390x = \ +@@ -196,7 +192,6 @@ qtests_s390x = \ + (config_host.has_key('CONFIG_POSIX') ? ['test-filter-redirector'] : []) + \ + ['boot-serial-test', + 'drive_del-test', +- 'device-plug-test', + 'virtio-ccw-test', + 'cpu-plug-test', + 'migration-test'] +diff --git a/tests/qtest/prom-env-test.c b/tests/qtest/prom-env-test.c +index f41d80154a..f8dc478ce8 100644 +--- a/tests/qtest/prom-env-test.c ++++ b/tests/qtest/prom-env-test.c +@@ -89,10 +89,14 @@ int main(int argc, char *argv[]) + if (!strcmp(arch, "ppc")) { + add_tests(ppc_machines); + } else if (!strcmp(arch, "ppc64")) { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + add_tests(ppc_machines); + if (g_test_slow()) { ++#endif + qtest_add_data_func("prom-env/pseries", "pseries", test_machine); ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + } ++#endif + } else if (!strcmp(arch, "sparc")) { + add_tests(sparc_machines); + } else if (!strcmp(arch, "sparc64")) { +diff --git a/tests/qtest/test-x86-cpuid-compat.c b/tests/qtest/test-x86-cpuid-compat.c +index f28848e06e..6b2fd398a2 100644 +--- a/tests/qtest/test-x86-cpuid-compat.c ++++ b/tests/qtest/test-x86-cpuid-compat.c +@@ -300,6 +300,7 @@ int main(int argc, char **argv) + "-cpu 486,xlevel2=0xC0000002,xstore=on", + "xlevel2", 0xC0000002); + ++#if 0 /* Disabled in Red Hat Enterprise Linux */ + /* Check compatibility of old machine-types that didn't + * auto-increase level/xlevel/xlevel2: */ + +@@ -350,6 +351,7 @@ int main(int argc, char **argv) + add_cpuid_test("x86/cpuid/xlevel-compat/pc-i440fx-2.4/npt-on", + "-machine pc-i440fx-2.4 -cpu SandyBridge,svm=on,npt=on", + "xlevel", 0x80000008); ++#endif + + /* Test feature parsing */ + add_feature_test("x86/cpuid/features/plus", +diff --git a/tests/qtest/usb-hcd-xhci-test.c b/tests/qtest/usb-hcd-xhci-test.c +index 10ef9d2a91..3855873050 100644 +--- a/tests/qtest/usb-hcd-xhci-test.c ++++ b/tests/qtest/usb-hcd-xhci-test.c +@@ -21,6 +21,7 @@ static void test_xhci_hotplug(void) + usb_test_hotplug(global_qtest, "xhci", "1", NULL); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void test_usb_uas_hotplug(void) + { + QTestState *qts = global_qtest; +@@ -36,6 +37,7 @@ static void test_usb_uas_hotplug(void) + qtest_qmp_device_del(qts, "scsihd"); + qtest_qmp_device_del(qts, "uas"); + } ++#endif + + static void test_usb_ccid_hotplug(void) + { +@@ -56,7 +58,9 @@ int main(int argc, char **argv) + + qtest_add_func("/xhci/pci/init", test_xhci_init); + qtest_add_func("/xhci/pci/hotplug", test_xhci_hotplug); ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + qtest_add_func("/xhci/pci/hotplug/usb-uas", test_usb_uas_hotplug); ++#endif + qtest_add_func("/xhci/pci/hotplug/usb-ccid", test_usb_ccid_hotplug); + + qtest_start("-device nec-usb-xhci,id=xhci" +-- +2.27.0 + diff --git a/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch b/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch new file mode 100644 index 0000000..c9e42b2 --- /dev/null +++ b/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -0,0 +1,104 @@ +From c358fd4c224a9c3f64b4a8fff34cc6b1dc201fa0 Mon Sep 17 00:00:00 2001 +From: Bandan Das +Date: Tue, 3 Dec 2013 20:05:13 +0100 +Subject: vfio: cap number of devices that can be assigned + +RH-Author: Bandan Das +Message-id: <1386101113-31560-3-git-send-email-bsd@redhat.com> +Patchwork-id: 55984 +O-Subject: [PATCH RHEL7 qemu-kvm v2 2/2] vfio: cap number of devices that can be assigned +Bugzilla: 678368 +RH-Acked-by: Alex Williamson +RH-Acked-by: Marcelo Tosatti +RH-Acked-by: Michael S. Tsirkin + +Go through all groups to get count of total number of devices +active to enforce limit + +Reasoning from Alex for the limit(32) - Assuming 3 slots per +device, with 125 slots (number of memory slots for RHEL 7), +we can support almost 40 devices and still have few slots left +for other uses. Stepping down a bit, the number 32 arbitrarily +matches the number of slots on a PCI bus and is also a nice power +of two. + +Count of slots increased to 509 later so we could increase limit +to 64 as some usecases require more than 32 devices. + +Signed-off-by: Bandan Das +--- + hw/vfio/pci.c | 29 ++++++++++++++++++++++++++++- + hw/vfio/pci.h | 1 + + 2 files changed, 29 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 67a183f17b..1e20f9fd59 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -45,6 +45,9 @@ + + #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug" + ++/* RHEL only: Set once for the first assigned dev */ ++static uint16_t device_limit; ++ + static void vfio_disable_interrupts(VFIOPCIDevice *vdev); + static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); + +@@ -2810,9 +2813,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + ssize_t len; + struct stat st; + int groupid; +- int i, ret; ++ int ret, i = 0; + bool is_mdev; + ++ if (device_limit && device_limit != vdev->assigned_device_limit) { ++ error_setg(errp, "Assigned device limit has been redefined. " ++ "Old:%d, New:%d", ++ device_limit, vdev->assigned_device_limit); ++ return; ++ } else { ++ device_limit = vdev->assigned_device_limit; ++ } ++ ++ QLIST_FOREACH(group, &vfio_group_list, next) { ++ QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { ++ i++; ++ } ++ } ++ ++ if (i >= vdev->assigned_device_limit) { ++ error_setg(errp, "Maximum supported vfio devices (%d) " ++ "already attached", vdev->assigned_device_limit); ++ return; ++ } ++ + if (!vdev->vbasedev.sysfsdev) { + if (!(~vdev->host.domain || ~vdev->host.bus || + ~vdev->host.slot || ~vdev->host.function)) { +@@ -3249,6 +3273,9 @@ static Property vfio_pci_dev_properties[] = { + DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), + DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice, + no_geforce_quirks, false), ++ /* RHEL only */ ++ DEFINE_PROP_UINT16("x-assigned-device-limit", VFIOPCIDevice, ++ assigned_device_limit, 64), + DEFINE_PROP_BOOL("x-no-kvm-ioeventfd", VFIOPCIDevice, no_kvm_ioeventfd, + false), + DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd, +diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h +index 64777516d1..e0fe6ca97e 100644 +--- a/hw/vfio/pci.h ++++ b/hw/vfio/pci.h +@@ -139,6 +139,7 @@ struct VFIOPCIDevice { + EventNotifier err_notifier; + EventNotifier req_notifier; + int (*resetfn)(struct VFIOPCIDevice *); ++ uint16_t assigned_device_limit; + uint32_t vendor_id; + uint32_t device_id; + uint32_t sub_vendor_id; +-- +2.31.1 + diff --git a/0013-Add-support-statement-to-help-output.patch b/0013-Add-support-statement-to-help-output.patch new file mode 100644 index 0000000..4826ea4 --- /dev/null +++ b/0013-Add-support-statement-to-help-output.patch @@ -0,0 +1,55 @@ +From ba0c7a5f6b9a1f75666db6b3b795ddf03695dc26 Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Wed, 4 Dec 2013 18:53:17 +0100 +Subject: Add support statement to -help output + +RH-Author: Eduardo Habkost +Message-id: <1386183197-27761-1-git-send-email-ehabkost@redhat.com> +Patchwork-id: 55994 +O-Subject: [qemu-kvm RHEL7 PATCH] Add support statement to -help output +Bugzilla: 972773 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: knoel@redhat.com +RH-Acked-by: Paolo Bonzini + +Add support statement to -help output, reporting direct qemu-kvm usage +as unsupported by Red Hat, and advising users to use libvirt instead. + +Signed-off-by: Eduardo Habkost +--- + softmmu/vl.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/softmmu/vl.c b/softmmu/vl.c +index 6f646531a0..9d5dab43d2 100644 +--- a/softmmu/vl.c ++++ b/softmmu/vl.c +@@ -831,9 +831,17 @@ static void version(void) + QEMU_COPYRIGHT "\n"); + } + ++static void print_rh_warning(void) ++{ ++ printf("\nWARNING: Direct use of qemu-kvm from the command line is not supported by Red Hat.\n" ++ "WARNING: Use libvirt as the stable management interface.\n" ++ "WARNING: Some command line options listed here may not be available in future releases.\n\n"); ++} ++ + static void help(int exitcode) + { + version(); ++ print_rh_warning(); + printf("usage: %s [options] [disk_image]\n\n" + "'disk_image' is a raw hard disk image for IDE hard disk 0\n\n", + g_get_prgname()); +@@ -859,6 +867,7 @@ static void help(int exitcode) + "\n" + QEMU_HELP_BOTTOM "\n"); + ++ print_rh_warning(); + exit(exitcode); + } + +-- +2.31.1 + diff --git a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch new file mode 100644 index 0000000..d9c8d42 --- /dev/null +++ b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -0,0 +1,110 @@ +From e9ebc159a9acf108e1ec6f622be3f256cf14aba7 Mon Sep 17 00:00:00 2001 +From: Bandan Das +Date: Tue, 3 Dec 2013 20:05:13 +0100 +Subject: vfio: cap number of devices that can be assigned + +RH-Author: Bandan Das +Message-id: <1386101113-31560-3-git-send-email-bsd@redhat.com> +Patchwork-id: 55984 +O-Subject: [PATCH RHEL7 qemu-kvm v2 2/2] vfio: cap number of devices that can be assigned +Bugzilla: 678368 +RH-Acked-by: Alex Williamson +RH-Acked-by: Marcelo Tosatti +RH-Acked-by: Michael S. Tsirkin + +Go through all groups to get count of total number of devices +active to enforce limit + +Reasoning from Alex for the limit(32) - Assuming 3 slots per +device, with 125 slots (number of memory slots for RHEL 7), +we can support almost 40 devices and still have few slots left +for other uses. Stepping down a bit, the number 32 arbitrarily +matches the number of slots on a PCI bus and is also a nice power +of two. + +Signed-off-by: Bandan Das + +Rebase notes (2.8.0): +- removed return value for vfio_realize (commit 1a22aca) + +Merged patches (2.9.0): +- 17eb774 vfio: Use error_setg when reporting max assigned device overshoot + + Merged patches (4.1.0-rc3): +- 2b89558 vfio: increase the cap on number of assigned devices to 64 +--- + hw/vfio/pci.c | 29 ++++++++++++++++++++++++++++- + hw/vfio/pci.h | 1 + + 2 files changed, 29 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 7b45353ce2..eb725a3aee 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -45,6 +45,9 @@ + + #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug" + ++/* RHEL only: Set once for the first assigned dev */ ++static uint16_t device_limit; ++ + static void vfio_disable_interrupts(VFIOPCIDevice *vdev); + static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); + +@@ -2807,9 +2810,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + ssize_t len; + struct stat st; + int groupid; +- int i, ret; ++ int ret, i = 0; + bool is_mdev; + ++ if (device_limit && device_limit != vdev->assigned_device_limit) { ++ error_setg(errp, "Assigned device limit has been redefined. " ++ "Old:%d, New:%d", ++ device_limit, vdev->assigned_device_limit); ++ return; ++ } else { ++ device_limit = vdev->assigned_device_limit; ++ } ++ ++ QLIST_FOREACH(group, &vfio_group_list, next) { ++ QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { ++ i++; ++ } ++ } ++ ++ if (i >= vdev->assigned_device_limit) { ++ error_setg(errp, "Maximum supported vfio devices (%d) " ++ "already attached", vdev->assigned_device_limit); ++ return; ++ } ++ + if (!vdev->vbasedev.sysfsdev) { + if (!(~vdev->host.domain || ~vdev->host.bus || + ~vdev->host.slot || ~vdev->host.function)) { +@@ -3246,6 +3270,9 @@ static Property vfio_pci_dev_properties[] = { + DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), + DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice, + no_geforce_quirks, false), ++ /* RHEL only */ ++ DEFINE_PROP_UINT16("x-assigned-device-limit", VFIOPCIDevice, ++ assigned_device_limit, 64), + DEFINE_PROP_BOOL("x-no-kvm-ioeventfd", VFIOPCIDevice, no_kvm_ioeventfd, + false), + DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd, +diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h +index 64777516d1..e0fe6ca97e 100644 +--- a/hw/vfio/pci.h ++++ b/hw/vfio/pci.h +@@ -139,6 +139,7 @@ struct VFIOPCIDevice { + EventNotifier err_notifier; + EventNotifier req_notifier; + int (*resetfn)(struct VFIOPCIDevice *); ++ uint16_t assigned_device_limit; + uint32_t vendor_id; + uint32_t device_id; + uint32_t sub_vendor_id; +-- +2.27.0 + diff --git a/0014-Add-support-statement-to-help-output.patch b/0014-Add-support-statement-to-help-output.patch new file mode 100644 index 0000000..2259e13 --- /dev/null +++ b/0014-Add-support-statement-to-help-output.patch @@ -0,0 +1,55 @@ +From b736b0c41dd62ed6f874a7b33ca1d4f9ceab4573 Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Wed, 4 Dec 2013 18:53:17 +0100 +Subject: Add support statement to -help output + +RH-Author: Eduardo Habkost +Message-id: <1386183197-27761-1-git-send-email-ehabkost@redhat.com> +Patchwork-id: 55994 +O-Subject: [qemu-kvm RHEL7 PATCH] Add support statement to -help output +Bugzilla: 972773 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: knoel@redhat.com +RH-Acked-by: Paolo Bonzini + +Add support statement to -help output, reporting direct qemu-kvm usage +as unsupported by Red Hat, and advising users to use libvirt instead. + +Signed-off-by: Eduardo Habkost +--- + softmmu/vl.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/softmmu/vl.c b/softmmu/vl.c +index 620a1f1367..d46b8fb4ab 100644 +--- a/softmmu/vl.c ++++ b/softmmu/vl.c +@@ -827,9 +827,17 @@ static void version(void) + QEMU_COPYRIGHT "\n"); + } + ++static void print_rh_warning(void) ++{ ++ printf("\nWARNING: Direct use of qemu-kvm from the command line is not supported by Red Hat.\n" ++ "WARNING: Use libvirt as the stable management interface.\n" ++ "WARNING: Some command line options listed here may not be available in future releases.\n\n"); ++} ++ + static void help(int exitcode) + { + version(); ++ print_rh_warning(); + printf("usage: %s [options] [disk_image]\n\n" + "'disk_image' is a raw hard disk image for IDE hard disk 0\n\n", + error_get_progname()); +@@ -855,6 +863,7 @@ static void help(int exitcode) + "\n" + QEMU_HELP_BOTTOM "\n"); + ++ print_rh_warning(); + exit(exitcode); + } + +-- +2.27.0 + diff --git a/0014-globally-limit-the-maximum-number-of-CPUs.patch b/0014-globally-limit-the-maximum-number-of-CPUs.patch new file mode 100644 index 0000000..6764a84 --- /dev/null +++ b/0014-globally-limit-the-maximum-number-of-CPUs.patch @@ -0,0 +1,45 @@ +From 9ebfd2f6cfa8e79c92e58fd169f90cc768fb865a Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Tue, 21 Jan 2014 10:46:52 +0100 +Subject: globally limit the maximum number of CPUs + +We now globally limit the number of VCPUs. +Especially, there is no way one can specify more than +max_cpus VCPUs for a VM. + +This allows us the restore the ppc max_cpus limitation to the upstream +default and minimize the ppc hack in kvm-all.c. + +Signed-off-by: David Hildenbrand +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo Cesar Lemes de Paula +--- + accel/kvm/kvm-all.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 5f1377ca04..fdf0e4d429 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2430,6 +2430,18 @@ static int kvm_init(MachineState *ms) + soft_vcpus_limit = kvm_recommended_vcpus(s); + hard_vcpus_limit = kvm_max_vcpus(s); + ++#ifdef HOST_PPC64 ++ /* ++ * On POWER, the kernel advertises a soft limit based on the ++ * number of CPU threads on the host. We want to allow exceeding ++ * this for testing purposes, so we don't want to set hard limit ++ * to soft limit as on x86. ++ */ ++#else ++ /* RHEL doesn't support nr_vcpus > soft_vcpus_limit */ ++ hard_vcpus_limit = soft_vcpus_limit; ++#endif ++ + while (nc->name) { + if (nc->num > soft_vcpus_limit) { + warn_report("Number of %s cpus requested (%d) exceeds " +-- +2.31.1 + diff --git a/0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch new file mode 100644 index 0000000..722484d --- /dev/null +++ b/0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -0,0 +1,61 @@ +From 4b6c8cdc52fdf94d4098d278defb3833dce1d189 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 8 Jul 2020 08:35:50 +0200 +Subject: Use qemu-kvm in documentation instead of qemu-system- + +Patchwork-id: 62380 +O-Subject: [RHEV-7.1 qemu-kvm-rhev PATCHv4] Use qemu-kvm in documentation instead of qemu-system-i386 +Bugzilla: 1140620 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Markus Armbruster +RH-Acked-by: Stefan Hajnoczi + +From: Miroslav Rezanina + +We change the name and location of qemu-kvm binaries. Update documentation +to reflect this change. Only architectures available in RHEL are updated. + +Signed-off-by: Miroslav Rezanina +--- + docs/defs.rst.inc | 4 ++-- + qemu-options.hx | 10 +++++----- + 2 files changed, 7 insertions(+), 7 deletions(-) + +diff --git a/docs/defs.rst.inc b/docs/defs.rst.inc +index 52d6454b93..d74dbdeca9 100644 +--- a/docs/defs.rst.inc ++++ b/docs/defs.rst.inc +@@ -9,7 +9,7 @@ + but the manpages will end up misrendered with following normal text + incorrectly in boldface. + +-.. |qemu_system| replace:: qemu-system-x86_64 +-.. |qemu_system_x86| replace:: qemu-system-x86_64 ++.. |qemu_system| replace:: qemu-kvm ++.. |qemu_system_x86| replace:: qemu-kvm + .. |I2C| replace:: I\ :sup:`2`\ C + .. |I2S| replace:: I\ :sup:`2`\ S +diff --git a/qemu-options.hx b/qemu-options.hx +index 34e9b32a5c..924f61ab6d 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -3233,11 +3233,11 @@ SRST + + :: + +- qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ +- -numa node,memdev=mem \ +- -chardev socket,id=chr0,path=/path/to/socket \ +- -netdev type=vhost-user,id=net0,chardev=chr0 \ +- -device virtio-net-pci,netdev=net0 ++ qemu-kvm -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ ++ -numa node,memdev=mem \ ++ -chardev socket,id=chr0,path=/path/to/socket \ ++ -netdev type=vhost-user,id=net0,chardev=chr0 \ ++ -device virtio-net-pci,netdev=net0 + + ``-netdev vhost-vdpa,vhostdev=/path/to/dev`` + Establish a vhost-vdpa netdev. +-- +2.31.1 + diff --git a/0015-globally-limit-the-maximum-number-of-CPUs.patch b/0015-globally-limit-the-maximum-number-of-CPUs.patch new file mode 100644 index 0000000..31d9643 --- /dev/null +++ b/0015-globally-limit-the-maximum-number-of-CPUs.patch @@ -0,0 +1,65 @@ +From 9a7621819821ee88d2f99d6b629fd87aa9a07758 Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Tue, 21 Jan 2014 10:46:52 +0100 +Subject: globally limit the maximum number of CPUs + +We now globally limit the number of VCPUs. +Especially, there is no way one can specify more than +max_cpus VCPUs for a VM. + +This allows us the restore the ppc max_cpus limitation to the upstream +default and minimize the ppc hack in kvm-all.c. + +Signed-off-by: David Hildenbrand +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo Cesar Lemes de Paula + +Rebase notes (2.11.0): +- Removed CONFIG_RHV reference +- Update commit log + +Merged patches (2.11.0): +- 92fef14623 redhat: remove manual max_cpus limitations for ppc +- bb722e9eff redhat: globally limit the maximum number of CPUs +- fdeef3c1c7 RHEL: Set vcpus hard limit to 240 for Power +- 0584216921 Match POWER max cpus to x86 + +Signed-off-by: Andrew Jones + +Merged patches (5.1.0): +- redhat: globally limit the maximum number of CPUs +- redhat: remove manual max_cpus limitations for ppc +- use recommended max vcpu count + +Merged patches (5.2.0 rc0): +- f8a4123 vl: Remove downstream-only MAX_RHEL_CPUS code +--- + accel/kvm/kvm-all.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index eecd8031cf..8f2a53438f 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2423,6 +2423,18 @@ static int kvm_init(MachineState *ms) + soft_vcpus_limit = kvm_recommended_vcpus(s); + hard_vcpus_limit = kvm_max_vcpus(s); + ++#ifdef HOST_PPC64 ++ /* ++ * On POWER, the kernel advertises a soft limit based on the ++ * number of CPU threads on the host. We want to allow exceeding ++ * this for testing purposes, so we don't want to set hard limit ++ * to soft limit as on x86. ++ */ ++#else ++ /* RHEL doesn't support nr_vcpus > soft_vcpus_limit */ ++ hard_vcpus_limit = soft_vcpus_limit; ++#endif ++ + while (nc->name) { + if (nc->num > soft_vcpus_limit) { + warn_report("Number of %s cpus requested (%d) exceeds " +-- +2.27.0 + diff --git a/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch new file mode 100644 index 0000000..9eda7c3 --- /dev/null +++ b/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -0,0 +1,126 @@ +From 0d3fc0b4c5773c6cabb0a58c064475f76eb6ac1e Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 8 Jul 2020 08:35:50 +0200 +Subject: Use qemu-kvm in documentation instead of qemu-system- + +Patchwork-id: 62380 +O-Subject: [RHEV-7.1 qemu-kvm-rhev PATCHv4] Use qemu-kvm in documentation instead of qemu-system-i386 +Bugzilla: 1140620 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Markus Armbruster +RH-Acked-by: Stefan Hajnoczi + +From: Miroslav Rezanina + +We change the name and location of qemu-kvm binaries. Update documentation +to reflect this change. Only architectures available in RHEL are updated. + +Signed-off-by: Miroslav Rezanina + +Rebase notes (5.1.0 rc0): + - qemu-block-drivers.texi converted to qemu-block-drivers.rst (upstream) + +Rebase notes (5.2.0 rc0): + - rewrite patch to new docs structure +--- + docs/defs.rst.inc | 4 ++-- + docs/tools/qemu-trace-stap.rst | 14 +++++++------- + qemu-options.hx | 10 +++++----- + 3 files changed, 14 insertions(+), 14 deletions(-) + +diff --git a/docs/defs.rst.inc b/docs/defs.rst.inc +index 52d6454b93..d74dbdeca9 100644 +--- a/docs/defs.rst.inc ++++ b/docs/defs.rst.inc +@@ -9,7 +9,7 @@ + but the manpages will end up misrendered with following normal text + incorrectly in boldface. + +-.. |qemu_system| replace:: qemu-system-x86_64 +-.. |qemu_system_x86| replace:: qemu-system-x86_64 ++.. |qemu_system| replace:: qemu-kvm ++.. |qemu_system_x86| replace:: qemu-kvm + .. |I2C| replace:: I\ :sup:`2`\ C + .. |I2S| replace:: I\ :sup:`2`\ S +diff --git a/docs/tools/qemu-trace-stap.rst b/docs/tools/qemu-trace-stap.rst +index d53073b52b..9e93df084f 100644 +--- a/docs/tools/qemu-trace-stap.rst ++++ b/docs/tools/qemu-trace-stap.rst +@@ -46,19 +46,19 @@ The following commands are valid: + any of the listed names. If no *PATTERN* is given, the all possible + probes will be listed. + +- For example, to list all probes available in the ``qemu-system-x86_64`` ++ For example, to list all probes available in the ``qemu-kvm`` + binary: + + :: + +- $ qemu-trace-stap list qemu-system-x86_64 ++ $ qemu-trace-stap list qemu-kvm + + To filter the list to only cover probes related to QEMU's cryptographic + subsystem, in a binary outside ``$PATH`` + + :: + +- $ qemu-trace-stap list /opt/qemu/4.0.0/bin/qemu-system-x86_64 'qcrypto*' ++ $ qemu-trace-stap list /opt/qemu/4.0.0/bin/qemu-kvm 'qcrypto*' + + .. option:: run OPTIONS BINARY PATTERN... + +@@ -90,18 +90,18 @@ The following commands are valid: + Restrict the tracing session so that it only triggers for the process + identified by *PID*. + +- For example, to monitor all processes executing ``qemu-system-x86_64`` ++ For example, to monitor all processes executing ``qemu-kvm`` + as found on ``$PATH``, displaying all I/O related probes: + + :: + +- $ qemu-trace-stap run qemu-system-x86_64 'qio*' ++ $ qemu-trace-stap run qemu-kvm 'qio*' + + To monitor only the QEMU process with PID 1732 + + :: + +- $ qemu-trace-stap run --pid=1732 qemu-system-x86_64 'qio*' ++ $ qemu-trace-stap run --pid=1732 qemu-kvm 'qio*' + + To monitor QEMU processes running an alternative binary outside of + ``$PATH``, displaying verbose information about setup of the +@@ -109,7 +109,7 @@ The following commands are valid: + + :: + +- $ qemu-trace-stap -v run /opt/qemu/4.0.0/qemu-system-x86_64 'qio*' ++ $ qemu-trace-stap -v run /opt/qemu/4.0.0/qemu-kvm 'qio*' + + See also + -------- +diff --git a/qemu-options.hx b/qemu-options.hx +index ae2c6dbbfc..94c4a8dbaf 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -3150,11 +3150,11 @@ SRST + + :: + +- qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ +- -numa node,memdev=mem \ +- -chardev socket,id=chr0,path=/path/to/socket \ +- -netdev type=vhost-user,id=net0,chardev=chr0 \ +- -device virtio-net-pci,netdev=net0 ++ qemu-kvm -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ ++ -numa node,memdev=mem \ ++ -chardev socket,id=chr0,path=/path/to/socket \ ++ -netdev type=vhost-user,id=net0,chardev=chr0 \ ++ -device virtio-net-pci,netdev=net0 + + ``-netdev vhost-vdpa,vhostdev=/path/to/dev`` + Establish a vhost-vdpa netdev. +-- +2.27.0 + diff --git a/0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch b/0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch new file mode 100644 index 0000000..9f08024 --- /dev/null +++ b/0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch @@ -0,0 +1,66 @@ +From b72e04cb7e417d9e1c973223747ab3a27abda8b4 Mon Sep 17 00:00:00 2001 +From: Fam Zheng +Date: Wed, 14 Jun 2017 15:37:01 +0200 +Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] + +RH-Author: Fam Zheng +Message-id: <20170614153701.14757-1-famz@redhat.com> +Patchwork-id: 75613 +O-Subject: [RHV-7.4 qemu-kvm-rhev PATCH v3] virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] +Bugzilla: 1378816 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz + +We need a fix for RHEL 7.4 and 7.3.z, but unfortunately upstream isn't +ready. If it were, the changes will be too invasive. To have an idea: + +https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg05400.html + +is an incomplete attempt to fix part of the issue, and the remaining +work unfortunately involve even more complex changes. + +As a band-aid, this partially reverts the effect of ef8875b +(virtio-scsi: Remove op blocker for dataplane, since v2.7). We cannot +simply revert that commit as a whole because we already shipped it in +qemu-kvm-rhev 7.3, since when, block jobs has been possible. We should +only block what has been broken. Also, faithfully reverting the above +commit means adding back the removed op blocker, but that is not enough, +because it still crashes when inserting media into an initially empty +scsi-cd. + +All in all, scsi-cd on virtio-scsi-dataplane has basically been unusable +unless the scsi-cd never enters an empty state, so, disable it +altogether. Otherwise it would be much more difficult to avoid +crashing. + +Signed-off-by: Fam Zheng +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo C. L. de Paula +--- + hw/scsi/virtio-scsi.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index 34a968ecfb..7f6da33a8a 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -896,6 +896,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, + AioContext *old_context; + int ret; + ++ /* XXX: Remove this check once block backend is capable of handling ++ * AioContext change upon eject/insert. ++ * s->ctx is NULL if ioeventfd is off, s->ctx is qemu_get_aio_context() if ++ * data plane is not used, both cases are safe for scsi-cd. */ ++ if (s->ctx && s->ctx != qemu_get_aio_context() && ++ object_dynamic_cast(OBJECT(dev), "scsi-cd")) { ++ error_setg(errp, "scsi-cd is not supported by data plane"); ++ return; ++ } + if (s->ctx && !s->dataplane_fenced) { + if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { + return; +-- +2.31.1 + diff --git a/0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch new file mode 100644 index 0000000..2bc687c --- /dev/null +++ b/0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch @@ -0,0 +1,60 @@ +From 64a06662cdea0ff62efb122be4eab506b2a842d9 Mon Sep 17 00:00:00 2001 +From: David Gibson +Date: Wed, 6 Feb 2019 03:58:56 +0000 +Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts + +RH-Author: David Gibson +Message-id: <20190206035856.19058-1-dgibson@redhat.com> +Patchwork-id: 84246 +O-Subject: [RHELAV-8.0/rhel qemu-kvm PATCH] BZ1653590: Require at least 64kiB pages for downstream guests & hosts +Bugzilla: 1653590 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Serhii Popovych +RH-Acked-by: Thomas Huth + +Most current POWER guests require 64kiB page support, so that's the default +for the cap-hpt-max-pagesize option in qemu which limits available guest +page sizes. We warn if the value is set smaller than that, but don't +outright fail upstream, because we need to allow for the possibility of +guest (and/or host) kernels configured for 4kiB page sizes. + +Downstream, however, we simply don't support 4kiB pagesize configured +kernels in guest or host, so we can have qemu simply error out in this +situation. + +Testing: Attempted to start a guest with cap-hpt-max-page-size=4k and verified + it failed immediately with a qemu error + +Signed-off-by: David Gibson +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr_caps.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c +index 655ab856a0..6aa7f93df9 100644 +--- a/hw/ppc/spapr_caps.c ++++ b/hw/ppc/spapr_caps.c +@@ -329,12 +329,19 @@ bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, + static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr, + uint8_t val, Error **errp) + { ++#if 0 /* disabled for RHEL */ + if (val < 12) { + error_setg(errp, "Require at least 4kiB hpt-max-page-size"); + return; + } else if (val < 16) { + warn_report("Many guests require at least 64kiB hpt-max-page-size"); + } ++#else /* Only page sizes >=64kiB supported for RHEL */ ++ if (val < 16) { ++ error_setg(errp, "Require at least 64kiB hpt-max-page-size"); ++ return; ++ } ++#endif + + spapr_check_pagesize(spapr, qemu_minrampagesize(), errp); + } +-- +2.31.1 + diff --git a/0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch b/0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch new file mode 100644 index 0000000..6b60efc --- /dev/null +++ b/0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch @@ -0,0 +1,66 @@ +From d95768c039a2bf6b68422f83a8d55dad41bd3181 Mon Sep 17 00:00:00 2001 +From: Fam Zheng +Date: Wed, 14 Jun 2017 15:37:01 +0200 +Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] + +RH-Author: Fam Zheng +Message-id: <20170614153701.14757-1-famz@redhat.com> +Patchwork-id: 75613 +O-Subject: [RHV-7.4 qemu-kvm-rhev PATCH v3] virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] +Bugzilla: 1378816 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz + +We need a fix for RHEL 7.4 and 7.3.z, but unfortunately upstream isn't +ready. If it were, the changes will be too invasive. To have an idea: + +https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg05400.html + +is an incomplete attempt to fix part of the issue, and the remaining +work unfortunately involve even more complex changes. + +As a band-aid, this partially reverts the effect of ef8875b +(virtio-scsi: Remove op blocker for dataplane, since v2.7). We cannot +simply revert that commit as a whole because we already shipped it in +qemu-kvm-rhev 7.3, since when, block jobs has been possible. We should +only block what has been broken. Also, faithfully reverting the above +commit means adding back the removed op blocker, but that is not enough, +because it still crashes when inserting media into an initially empty +scsi-cd. + +All in all, scsi-cd on virtio-scsi-dataplane has basically been unusable +unless the scsi-cd never enters an empty state, so, disable it +altogether. Otherwise it would be much more difficult to avoid +crashing. + +Signed-off-by: Fam Zheng +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo C. L. de Paula +--- + hw/scsi/virtio-scsi.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index 51fd09522a..a35257c35a 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -896,6 +896,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, + AioContext *old_context; + int ret; + ++ /* XXX: Remove this check once block backend is capable of handling ++ * AioContext change upon eject/insert. ++ * s->ctx is NULL if ioeventfd is off, s->ctx is qemu_get_aio_context() if ++ * data plane is not used, both cases are safe for scsi-cd. */ ++ if (s->ctx && s->ctx != qemu_get_aio_context() && ++ object_dynamic_cast(OBJECT(dev), "scsi-cd")) { ++ error_setg(errp, "scsi-cd is not supported by data plane"); ++ return; ++ } + if (s->ctx && !s->dataplane_fenced) { + if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { + return; +-- +2.27.0 + diff --git a/0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch new file mode 100644 index 0000000..e07746d --- /dev/null +++ b/0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch @@ -0,0 +1,60 @@ +From 92bb62c47eab021f8dabecd09b5fbc1706e6a29c Mon Sep 17 00:00:00 2001 +From: David Gibson +Date: Wed, 6 Feb 2019 03:58:56 +0000 +Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts + +RH-Author: David Gibson +Message-id: <20190206035856.19058-1-dgibson@redhat.com> +Patchwork-id: 84246 +O-Subject: [RHELAV-8.0/rhel qemu-kvm PATCH] BZ1653590: Require at least 64kiB pages for downstream guests & hosts +Bugzilla: 1653590 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Serhii Popovych +RH-Acked-by: Thomas Huth + +Most current POWER guests require 64kiB page support, so that's the default +for the cap-hpt-max-pagesize option in qemu which limits available guest +page sizes. We warn if the value is set smaller than that, but don't +outright fail upstream, because we need to allow for the possibility of +guest (and/or host) kernels configured for 4kiB page sizes. + +Downstream, however, we simply don't support 4kiB pagesize configured +kernels in guest or host, so we can have qemu simply error out in this +situation. + +Testing: Attempted to start a guest with cap-hpt-max-page-size=4k and verified + it failed immediately with a qemu error + +Signed-off-by: David Gibson +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr_caps.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c +index ed7c077a0d..48a8efe678 100644 +--- a/hw/ppc/spapr_caps.c ++++ b/hw/ppc/spapr_caps.c +@@ -332,12 +332,19 @@ bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, + static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr, + uint8_t val, Error **errp) + { ++#if 0 /* disabled for RHEL */ + if (val < 12) { + error_setg(errp, "Require at least 4kiB hpt-max-page-size"); + return; + } else if (val < 16) { + warn_report("Many guests require at least 64kiB hpt-max-page-size"); + } ++#else /* Only page sizes >=64kiB supported for RHEL */ ++ if (val < 16) { ++ error_setg(errp, "Require at least 64kiB hpt-max-page-size"); ++ return; ++ } ++#endif + + spapr_check_pagesize(spapr, qemu_minrampagesize(), errp); + } +-- +2.27.0 + diff --git a/0018-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch b/0018-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch new file mode 100644 index 0000000..d7401d5 --- /dev/null +++ b/0018-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch @@ -0,0 +1,77 @@ +From 54f9157a918e1404f2f17ce89a9c8b9088c1bc06 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 20 Aug 2021 18:25:12 +0200 +Subject: qcow2: Deprecation warning when opening v2 images rw +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Kevin Wolf +RH-MergeRequest: 37: qcow2: Deprecation warning when opening v2 images rw +RH-Commit: [1/1] f450d0ae32d35063b28c72c4f2d2ebb9e6d8db3e (kmwolf/centos-qemu-kvm) +RH-Bugzilla: 1951814 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Hanna Reitz +RH-Acked-by: Thomas Huth +RH-Acked-by: Philippe Mathieu-Daudé + +qcow2 v3 has been around for a long time (since QEMU 1.1/RHEL 7), so +there is no real reason any more to use it. People still using it might +do so unintentionally. Warn about it and suggest upgrading during the +RHEL 9 timeframe so that the code can possibly be disabled in RHEL 10. + +The warning is restricted to read-write mode and the system emulator. +The primary motivation for not having it in qemu-img is that 'qemu-img +amend' for upgrades would warn otherwise. It also avoids having to make +too many changes to the test suite. + +bdrv_uses_whitelist() is used as a proxy for deciding whether we are +running in a tool or the system emulator. This is not entirely clean, +but it's what is available and the same function qcow2_do_open() already +uses it this way for another warning. + +Signed-off-by: Kevin Wolf + +patch_name: kvm-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch +present_in_specfile: true +location_in_specfile: 116 +--- +Rebase notes (6.1.0): +- Replace bs->read_only with bdrv_is_read_only +--- + block/qcow2.c | 6 ++++++ + tests/qemu-iotests/common.filter | 1 + + 2 files changed, 7 insertions(+) + +diff --git a/block/qcow2.c b/block/qcow2.c +index b5c47931ef..a795e457ac 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -1337,6 +1337,12 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, + ret = -ENOTSUP; + goto fail; + } ++ if (header.version < 3 && !bdrv_is_read_only(bs) && bdrv_uses_whitelist()) { ++ warn_report_once("qcow2 v2 images are deprecated and may not be " ++ "supported in future versions. Please consider " ++ "upgrading the image with 'qemu-img amend " ++ "-o compat=v3'."); ++ } + + s->qcow_version = header.version; + +diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter +index cc9f1a5891..6a13757177 100644 +--- a/tests/qemu-iotests/common.filter ++++ b/tests/qemu-iotests/common.filter +@@ -83,6 +83,7 @@ _filter_qemu() + { + gsed -e "s#\\(^\\|(qemu) \\)$(basename $QEMU_PROG):#\1QEMU_PROG:#" \ + -e 's#^QEMU [0-9]\+\.[0-9]\+\.[0-9]\+ monitor#QEMU X.Y.Z monitor#' \ ++ -e "/qcow2 v2 images are deprecated/d" \ + -e $'s#\r##' # QEMU monitor uses \r\n line endings + } + +-- +2.31.1 + diff --git a/0019-WRB-Introduce-RHEL-9.0.0-hw-compat-structure.patch b/0019-WRB-Introduce-RHEL-9.0.0-hw-compat-structure.patch new file mode 100644 index 0000000..285cd6b --- /dev/null +++ b/0019-WRB-Introduce-RHEL-9.0.0-hw-compat-structure.patch @@ -0,0 +1,135 @@ +From 1d6439527aa6ccabb58208c94417778ccc19de39 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 9 Feb 2022 04:16:25 -0500 +Subject: WRB: Introduce RHEL 9.0.0 hw compat structure + +General compatibility structure for post RHEL 9.0.0 rebase. + +Signed-off-by: Miroslav Rezanina +--- + hw/core/machine.c | 9 +++++++++ + hw/i386/pc.c | 6 ++++++ + hw/i386/pc_piix.c | 4 ++++ + hw/i386/pc_q35.c | 4 ++++ + hw/s390x/s390-virtio-ccw.c | 2 ++ + include/hw/boards.h | 3 +++ + include/hw/i386/pc.h | 3 +++ + 7 files changed, 31 insertions(+) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 28989b6e7b..dffc3ef4ab 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -53,6 +53,15 @@ GlobalProperty hw_compat_rhel_8_6[] = { + }; + const size_t hw_compat_rhel_8_6_len = G_N_ELEMENTS(hw_compat_rhel_8_6); + ++/* ++ * Mostly the same as hw_compat_6_2 ++ */ ++GlobalProperty hw_compat_rhel_9_0[] = { ++ /* hw_compat_rhel_9_0 from hw_compat_6_2 */ ++ { "PIIX4_PM", "x-not-migrate-acpi-index", "on"}, ++}; ++const size_t hw_compat_rhel_9_0_len = G_N_ELEMENTS(hw_compat_rhel_9_0); ++ + /* + * Mostly the same as hw_compat_6_0 and hw_compat_6_1 + */ +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 263d882af6..0886cfe3fe 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -391,6 +391,12 @@ GlobalProperty pc_rhel_compat[] = { + }; + const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + ++GlobalProperty pc_rhel_9_0_compat[] = { ++ /* pc_rhel_9_0_compat from pc_compat_6_2 */ ++ { "virtio-mem", "unplugged-inaccessible", "off" }, ++}; ++const size_t pc_rhel_9_0_compat_len = G_N_ELEMENTS(pc_rhel_9_0_compat); ++ + GlobalProperty pc_rhel_8_5_compat[] = { + /* pc_rhel_8_5_compat from pc_compat_6_0 */ + { "qemu64" "-" TYPE_X86_CPU, "family", "6" }, +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 0cacc0d623..dc987fe93b 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -1014,6 +1014,10 @@ static void pc_machine_rhel760_options(MachineClass *m) + pcmc->kvmclock_create_always = false; + /* From pc_i440fx_5_1_machine_options() */ + pcmc->pci_root_uid = 1; ++ compat_props_add(m->compat_props, hw_compat_rhel_9_0, ++ hw_compat_rhel_9_0_len); ++ compat_props_add(m->compat_props, pc_rhel_9_0_compat, ++ pc_rhel_9_0_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_8_6, + hw_compat_rhel_8_6_len); + compat_props_add(m->compat_props, hw_compat_rhel_8_5, +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 157160e069..52c253c570 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -669,6 +669,10 @@ static void pc_q35_machine_rhel900_options(MachineClass *m) + m->desc = "RHEL-9.0.0 PC (Q35 + ICH9, 2009)"; + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "9.0.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_9_0, ++ hw_compat_rhel_9_0_len); ++ compat_props_add(m->compat_props, pc_rhel_9_0_compat, ++ pc_rhel_9_0_compat_len); + } + + DEFINE_PC_MACHINE(q35_rhel900, "pc-q35-rhel9.0.0", pc_q35_init_rhel900, +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 465a2a09d2..08e0f6a79b 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1118,12 +1118,14 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) + DEFINE_CCW_MACHINE(2_4, "2.4", false); + #endif + ++ + static void ccw_machine_rhel900_instance_options(MachineState *machine) + { + } + + static void ccw_machine_rhel900_class_options(MachineClass *mc) + { ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len); + } + DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true); + +diff --git a/include/hw/boards.h b/include/hw/boards.h +index d1555665df..635e45dd71 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -451,6 +451,9 @@ extern const size_t hw_compat_2_2_len; + extern GlobalProperty hw_compat_2_1[]; + extern const size_t hw_compat_2_1_len; + ++extern GlobalProperty hw_compat_rhel_9_0[]; ++extern const size_t hw_compat_rhel_9_0_len; ++ + extern GlobalProperty hw_compat_rhel_8_6[]; + extern const size_t hw_compat_rhel_8_6_len; + +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 419a6ec24b..a492c420b5 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -292,6 +292,9 @@ extern const size_t pc_compat_1_4_len; + extern GlobalProperty pc_rhel_compat[]; + extern const size_t pc_rhel_compat_len; + ++extern GlobalProperty pc_rhel_9_0_compat[]; ++extern const size_t pc_rhel_9_0_compat_len; ++ + extern GlobalProperty pc_rhel_8_5_compat[]; + extern const size_t pc_rhel_8_5_compat_len; + +-- +2.31.1 + diff --git a/0019-compat-Update-hw_compat_rhel_8_5.patch b/0019-compat-Update-hw_compat_rhel_8_5.patch new file mode 100644 index 0000000..6d2b7c3 --- /dev/null +++ b/0019-compat-Update-hw_compat_rhel_8_5.patch @@ -0,0 +1,53 @@ +From a9b5da617c29f48199cbea08d6a1c083877dce10 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Mon, 15 Nov 2021 14:22:29 +0100 +Subject: compat: Update hw_compat_rhel_8_5 + +RH-Author: Laurent Vivier +RH-MergeRequest: 66: redhat: Update pseries-rhel8.5.0 machine type +RH-Commit: [1/2] 232f2ad2b29d250fbdb8fcea9d814704c575ba2b +RH-Bugzilla: 2022608 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Greg Kurz + +Add properties from hw_compat_6_1 as it already includes the ones from +hw_compat_6_0. Add a lately added property from 6.0 too. + +Signed-off-by: Laurent Vivier +-- +Rebase notes (6.2.0 rc3): +- Included compatc changes introduced in RC2 +--- + hw/core/machine.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 62febde5aa..736c765c30 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -38,7 +38,7 @@ + #include "hw/virtio/virtio-pci.h" + + /* +- * Mostly the same as hw_compat_6_0 ++ * Mostly the same as hw_compat_6_0 and hw_compat_6_1 + */ + GlobalProperty hw_compat_rhel_8_5[] = { + /* hw_compat_rhel_8_5 from hw_compat_6_0 */ +@@ -51,6 +51,12 @@ GlobalProperty hw_compat_rhel_8_5[] = { + { "e1000", "init-vet", "off" }, + /* hw_compat_rhel_8_5 from hw_compat_6_0 */ + { "e1000e", "init-vet", "off" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "vhost-vsock-device", "seqpacket", "off" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_1 */ ++ { "vhost-user-vsock-device", "seqpacket", "off" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_1 */ ++ { "nvme-ns", "shared", "off" }, + }; + const size_t hw_compat_rhel_8_5_len = G_N_ELEMENTS(hw_compat_rhel_8_5); + +-- +2.27.0 + diff --git a/0020-redhat-Update-pseries-rhel8.5.0-machine-type.patch b/0020-redhat-Update-pseries-rhel8.5.0-machine-type.patch new file mode 100644 index 0000000..af8e9dd --- /dev/null +++ b/0020-redhat-Update-pseries-rhel8.5.0-machine-type.patch @@ -0,0 +1,43 @@ +From 82358c35f04f026820b3907069a6c19cd95b654d Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Mon, 15 Nov 2021 14:25:33 +0100 +Subject: redhat: Update pseries-rhel8.5.0 machine type + +RH-Author: Laurent Vivier +RH-MergeRequest: 66: redhat: Update pseries-rhel8.5.0 machine type +RH-Commit: [2/2] 36f7ad1ea56baaaecb139875ad0a90a6470196be +RH-Bugzilla: 2022608 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Greg Kurz +` +We don't introduce a new machine type for rhel8.6.0 but we need +to keep compatibility with rhel8.5.0 machine type. + +Signed-off-by: Laurent Vivier +--- + hw/ppc/spapr.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index cace86028d..2f27888d8a 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -5177,10 +5177,14 @@ static void spapr_machine_rhel_default_class_options(MachineClass *mc) + + static void spapr_machine_rhel850_class_options(MachineClass *mc) + { ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ + /* The default machine type must apply the RHEL specific defaults */ + spapr_machine_rhel_default_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, + hw_compat_rhel_8_5_len); ++ smc->pre_6_2_numa_affinity = true; ++ mc->smp_props.prefer_sockets = true; + } + + DEFINE_SPAPR_MACHINE(rhel850, "rhel8.5.0", true); +-- +2.27.0 + diff --git a/0020-redhat-Update-s390x-machine-type-compatibility-for-r.patch b/0020-redhat-Update-s390x-machine-type-compatibility-for-r.patch new file mode 100644 index 0000000..d3b91d0 --- /dev/null +++ b/0020-redhat-Update-s390x-machine-type-compatibility-for-r.patch @@ -0,0 +1,38 @@ +From c8ad21ca31892f8798cf82508c2b2c61bf3b9895 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Mon, 4 Apr 2022 12:15:50 +0200 +Subject: redhat: Update s390x machine type compatibility for rebase to QEMU + 7.0.0 + +RH-Author: Thomas Huth +RH-MergeRequest: 143: Update machine type compatibility for QEMU 7.0.0 update [s390x] +RH-Commit: [23/23] 0ecf97d7bdddc50565b5779c64744b353f715cbd +RH-Bugzilla: 2064782 +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +No s390x-specific machine class property updates required this time, +only an update to the default qemu cpu model. + +Signed-off-by: Thomas Huth +--- + hw/s390x/s390-virtio-ccw.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 08e0f6a79b..4a491d4988 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1121,6 +1121,9 @@ DEFINE_CCW_MACHINE(2_4, "2.4", false); + + static void ccw_machine_rhel900_instance_options(MachineState *machine) + { ++ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 }; ++ ++ s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat); + } + + static void ccw_machine_rhel900_class_options(MachineClass *mc) +-- +2.31.1 + diff --git a/0021-pc-Move-s3-s4-suspend-disabling-to-compat.patch b/0021-pc-Move-s3-s4-suspend-disabling-to-compat.patch new file mode 100644 index 0000000..f9535a8 --- /dev/null +++ b/0021-pc-Move-s3-s4-suspend-disabling-to-compat.patch @@ -0,0 +1,70 @@ +From 38b89dc24551258b630f09d1c654b6c72b265c79 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Thu, 14 Apr 2022 14:58:43 +0100 +Subject: pc: Move s3/s4 suspend disabling to compat + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 155: 7.0 machine type fixes (x86) +RH-Commit: [26/26] 7d666032d5f5dab1444ebba085f92f2de4e86699 +RH-Bugzilla: 2064771 + +Our downstream patches currently have tweaks in the C code to disable +s3/s4; Thomas pointed out we can just set the property. + +Signed-off-by: Dr. David Alan Gilbert +--- + hw/acpi/ich9.c | 4 ++-- + hw/acpi/piix4.c | 4 ++-- + hw/i386/pc.c | 6 ++++++ + 3 files changed, 10 insertions(+), 4 deletions(-) + +diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c +index de1e401cdf..bd9bbade70 100644 +--- a/hw/acpi/ich9.c ++++ b/hw/acpi/ich9.c +@@ -435,8 +435,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) + static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; + pm->acpi_memory_hotplug.is_enabled = true; + pm->cpu_hotplug_legacy = true; +- pm->disable_s3 = 1; +- pm->disable_s4 = 1; ++ pm->disable_s3 = 0; ++ pm->disable_s4 = 0; + pm->s4_val = 2; + pm->use_acpi_hotplug_bridge = true; + pm->keep_pci_slot_hpc = true; +diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c +index 28544e78c3..2fb2b43248 100644 +--- a/hw/acpi/piix4.c ++++ b/hw/acpi/piix4.c +@@ -653,8 +653,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) + + static Property piix4_pm_properties[] = { + DEFINE_PROP_UINT32("smb_io_base", PIIX4PMState, smb_io_base, 0), +- DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 1), +- DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 1), ++ DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 0), ++ DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 0), + DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_VAL, PIIX4PMState, s4_val, 2), + DEFINE_PROP_BOOL(ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, PIIX4PMState, + use_acpi_hotplug_bridge, true), +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 0886cfe3fe..f98f842f80 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -380,6 +380,12 @@ const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); + * machine type. + */ + GlobalProperty pc_rhel_compat[] = { ++ /* we don't support s3/s4 suspend */ ++ { "PIIX4_PM", "disable_s3", "1" }, ++ { "PIIX4_PM", "disable_s4", "1" }, ++ { "ICH9-LPC", "disable_s3", "1" }, ++ { "ICH9-LPC", "disable_s4", "1" }, ++ + { TYPE_X86_CPU, "host-phys-bits", "on" }, + { TYPE_X86_CPU, "host-phys-bits-limit", "48" }, + { TYPE_X86_CPU, "vmx-entry-load-perf-global-ctrl", "off" }, +-- +2.31.1 + diff --git a/0021-redhat-virt-rhel8.5.0-Update-machine-type-compatibil.patch b/0021-redhat-virt-rhel8.5.0-Update-machine-type-compatibil.patch new file mode 100644 index 0000000..3bcf4e0 --- /dev/null +++ b/0021-redhat-virt-rhel8.5.0-Update-machine-type-compatibil.patch @@ -0,0 +1,51 @@ +From ce73e939b993cc6be170cdb5d3f2068270593f2b Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Tue, 16 Nov 2021 17:03:07 +0100 +Subject: redhat: virt-rhel8.5.0: Update machine type compatibility for QEMU + 6.2.0 update + +RH-Author: Eric Auger +RH-MergeRequest: 75: redhat: virt-rhel8.5.0: Update machine type compatibility for QEMU 6.2.0 update +RH-Commit: [21/21] f027d13654944e3d34e3356affe7af952eec2bed +RH-Bugzilla: 2022607 +RH-Acked-by: Gavin Shan +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Andrew Jones +RH-Acked-by: Cornelia Huck +RH-Acked-by: Laurent Vivier + +To keep compatibility with 8.5-AV machine type we need to +turn few new options on by default: +smp_props.prefer_sockets, no_cpu_topology, no_tcg_its + +TESTED: migrate from rhel-av-8.5.0 to rhel-8.6.0 and vice-versa +with upstream fix: 33a0c404fb hw/intc/arm_gicv3_its: Revert version +increments in vmstate_its + +Signed-off-by: Eric Auger +Signed-off-by: Miroslav Rezanina +--- + hw/arm/virt.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index c77d26ab13..e8941afd01 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3225,8 +3225,13 @@ type_init(rhel_machine_init); + + static void rhel850_virt_options(MachineClass *mc) + { ++ VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); ++ + compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); ++ mc->smp_props.prefer_sockets = true; ++ vmc->no_cpu_topology = true; ++ vmc->no_tcg_its = true; + } + DEFINE_RHEL_MACHINE_AS_LATEST(8, 5, 0) + +-- +2.27.0 + diff --git a/0022-Fix-virtio-net-pci-vectors-compat.patch b/0022-Fix-virtio-net-pci-vectors-compat.patch new file mode 100644 index 0000000..b484ea1 --- /dev/null +++ b/0022-Fix-virtio-net-pci-vectors-compat.patch @@ -0,0 +1,45 @@ +From f9643b6934657292aae0b830627b1e5f9b8cbaa1 Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Tue, 19 Oct 2021 13:17:06 -0400 +Subject: Fix virtio-net-pci* "vectors" compat + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 77: 8.6/6.2 mt fixes +RH-Commit: [21/23] 8ad581932275d2698a99f31bec40b14f1dbd3d2e +RH-Bugzilla: 2026443 +RH-Acked-by: Miroslav Rezanina + +hw_compat_rhel_8_4 has an issue: it affects only "virtio-net-pci" +but not "virtio-net-pci-transitional" and +"virtio-net-pci-non-transitional". The solution is to use the +"virtio-net-pci-base" type in compat_props. + +An equivalent fix will be submitted for hw_compat_5_2 upstream. + +Signed-off-by: Eduardo Habkost +(cherry picked from commit d45823ab0d0138b2fbaf2ed1e1896d2052f3ccb3) +Signed-off-by: Miroslav Rezanina +--- + hw/core/machine.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 736c765c30..024b025fc2 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -71,7 +71,11 @@ GlobalProperty hw_compat_rhel_8_4[] = { + /* hw_compat_rhel_8_4 from hw_compat_5_2 */ + { "virtio-blk-device", "report-discard-granularity", "off" }, + /* hw_compat_rhel_8_4 from hw_compat_5_2 */ +- { "virtio-net-pci", "vectors", "3"}, ++ /* ++ * Upstream incorrectly had "virtio-net-pci" instead of "virtio-net-pci-base", ++ * (https://bugzilla.redhat.com/show_bug.cgi?id=1999141) ++ */ ++ { "virtio-net-pci-base", "vectors", "3"}, + }; + const size_t hw_compat_rhel_8_4_len = G_N_ELEMENTS(hw_compat_rhel_8_4); + +-- +2.27.0 + diff --git a/0023-x86-rhel-machine-types-Add-pc_rhel_8_5_compat.patch b/0023-x86-rhel-machine-types-Add-pc_rhel_8_5_compat.patch new file mode 100644 index 0000000..8572d61 --- /dev/null +++ b/0023-x86-rhel-machine-types-Add-pc_rhel_8_5_compat.patch @@ -0,0 +1,73 @@ +From 7ad8814e583dcc7dc23e3e8398570243b8f176a1 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 23 Nov 2021 17:57:42 +0000 +Subject: x86/rhel machine types: Add pc_rhel_8_5_compat + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 77: 8.6/6.2 mt fixes +RH-Commit: [22/23] 8bf555c5d78f344b97ffd5c888c7a7bed592d9d0 +RH-Bugzilla: 2026443 +RH-Acked-by: Miroslav Rezanina + +Add pc_rhel_8_5_compat as the merge of pc_compat_6_1 and pc_compat_6_0 +(since 8.5 was based on 6.0). + +Note, x-keep-pci-slot-hpc flipped back and forward, leaving it out +looks like it leaves us with the original. + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Miroslav Rezanina +--- + hw/i386/pc.c | 21 +++++++++++++++++++++ + include/hw/i386/pc.h | 3 +++ + 2 files changed, 24 insertions(+) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index e8109954ca..4c08a1971c 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -387,6 +387,27 @@ GlobalProperty pc_rhel_compat[] = { + }; + const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + ++GlobalProperty pc_rhel_8_5_compat[] = { ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { "qemu64" "-" TYPE_X86_CPU, "family", "6" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { "qemu64" "-" TYPE_X86_CPU, "model", "6" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { "qemu64" "-" TYPE_X86_CPU, "stepping", "3" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { TYPE_X86_CPU, "x-vendor-cpuid-only", "off" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { "ICH9-LPC", ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, "off" }, ++ ++ /* pc_rhel_8_5_compat from pc_compat_6_1 */ ++ { TYPE_X86_CPU, "hv-version-id-build", "0x1bbc" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_1 */ ++ { TYPE_X86_CPU, "hv-version-id-major", "0x0006" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_1 */ ++ { TYPE_X86_CPU, "hv-version-id-minor", "0x0001" }, ++}; ++const size_t pc_rhel_8_5_compat_len = G_N_ELEMENTS(pc_rhel_8_5_compat); ++ + GlobalProperty pc_rhel_8_4_compat[] = { + /* pc_rhel_8_4_compat from pc_compat_5_2 */ + { "ICH9-LPC", "x-smi-cpu-hotunplug", "off" }, +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index d0544ee119..9e8bfb69f8 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -286,6 +286,9 @@ extern const size_t pc_compat_1_4_len; + extern GlobalProperty pc_rhel_compat[]; + extern const size_t pc_rhel_compat_len; + ++extern GlobalProperty pc_rhel_8_5_compat[]; ++extern const size_t pc_rhel_8_5_compat_len; ++ + extern GlobalProperty pc_rhel_8_4_compat[]; + extern const size_t pc_rhel_8_4_compat_len; + +-- +2.27.0 + diff --git a/0024-x86-rhel-machine-types-Wire-compat-into-q35-and-i440.patch b/0024-x86-rhel-machine-types-Wire-compat-into-q35-and-i440.patch new file mode 100644 index 0000000..4acfa88 --- /dev/null +++ b/0024-x86-rhel-machine-types-Wire-compat-into-q35-and-i440.patch @@ -0,0 +1,54 @@ +From 7bd99eebadfdbea6a76585b526e7cab1ee8b1fde Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 23 Nov 2021 18:07:49 +0000 +Subject: x86/rhel machine types: Wire compat into q35 and i440fx + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 77: 8.6/6.2 mt fixes +RH-Commit: [23/23] fc3861aeccc943b434231193ef45ffbc0b3cf6c6 +RH-Bugzilla: 2026443 +RH-Acked-by: Miroslav Rezanina + +Wire the pc_rhel_8_5 compat data into both piix and q35 +to keep the existing machine types compatible. + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Miroslav Rezanina +--- + hw/i386/pc_piix.c | 4 ++++ + hw/i386/pc_q35.c | 4 ++++ + 2 files changed, 8 insertions(+) + +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 2885edffe9..37fab00733 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -1040,6 +1040,10 @@ static void pc_machine_rhel760_options(MachineClass *m) + pcmc->kvmclock_create_always = false; + /* From pc_i440fx_5_1_machine_options() */ + pcmc->pci_root_uid = 1; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_5, ++ hw_compat_rhel_8_5_len); ++ compat_props_add(m->compat_props, pc_rhel_8_5_compat, ++ pc_rhel_8_5_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_8_4, + hw_compat_rhel_8_4_len); + compat_props_add(m->compat_props, pc_rhel_8_4_compat, +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index c67418b6a9..78876e1101 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -658,6 +658,10 @@ static void pc_q35_machine_rhel850_options(MachineClass *m) + m->desc = "RHEL-8.5.0 PC (Q35 + ICH9, 2009)"; + pcmc->smbios_stream_product = "RHEL-AV"; + pcmc->smbios_stream_version = "8.5.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_5, ++ hw_compat_rhel_8_5_len); ++ compat_props_add(m->compat_props, pc_rhel_8_5_compat, ++ pc_rhel_8_5_compat_len); + } + + DEFINE_PC_MACHINE(q35_rhel850, "pc-q35-rhel8.5.0", pc_q35_init_rhel850, +-- +2.27.0 + diff --git a/0025-redhat-Add-s390x-machine-type-compatibility-handling.patch b/0025-redhat-Add-s390x-machine-type-compatibility-handling.patch new file mode 100644 index 0000000..1ae8a99 --- /dev/null +++ b/0025-redhat-Add-s390x-machine-type-compatibility-handling.patch @@ -0,0 +1,58 @@ +From 265a57f2955b7f0b65e3f57f89aa1ff2541d3f73 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 26 Nov 2021 09:37:11 +0100 +Subject: redhat: Add s390x machine type compatibility handling for the rebase + to v6.2 + +RH-Author: Thomas Huth +RH-MergeRequest: 80: Add s390x machine type compatibility handling for the rebase to v6.2 +RH-Commit: [26/26] c45cf594604f6dd23954696b9c84d2025e328d11 +RH-Bugzilla: 2022602 +RH-Acked-by: David Hildenbrand +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cornelia Huck + +Add compatibility handling for the rhel8.5.0 machine type (and +recursively older, of course). + +Based on the following upstream commits: + + 463e50da8b - s390x/cpumodel: Bump up QEMU model to a stripped-down IBM z14 GA2 + 30e398f796 - s390x/cpumodel: Add more feature to gen16 default model + 4a0af2930a - machine: Prefer cores over sockets in smp parsing since 6.2 + 2b52619994 - machine: Move smp_prefer_sockets to struct SMPCompatProps + +Signed-off-by: Thomas Huth +--- + hw/s390x/s390-virtio-ccw.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 181856e6cf..cf13c457d6 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1105,11 +1105,21 @@ DEFINE_CCW_MACHINE(2_4, "2.4", false); + + static void ccw_machine_rhel850_instance_options(MachineState *machine) + { ++ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_0 }; ++ ++ s390_set_qemu_cpu_model(0x2964, 13, 2, qemu_cpu_feat); ++ ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_NNPA); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_VECTOR_PACKED_DECIMAL_ENH2); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_BEAR_ENH); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_RDP); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAI); + } + + static void ccw_machine_rhel850_class_options(MachineClass *mc) + { + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); ++ mc->smp_props.prefer_sockets = true; + } + DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", true); + +-- +2.27.0 + diff --git a/81-kvm-rhel.rules b/81-kvm-rhel.rules new file mode 100644 index 0000000..787cad6 --- /dev/null +++ b/81-kvm-rhel.rules @@ -0,0 +1 @@ +DEVPATH=="*/kvm", ACTION=="change", RUN+="/lib/udev/udev-kvm-check $env{COUNT} $env{EVENT}" diff --git a/85-kvm.preset b/85-kvm.preset new file mode 100644 index 0000000..8024052 --- /dev/null +++ b/85-kvm.preset @@ -0,0 +1,5 @@ +# Enable kvm-setup by default. This can have odd side effects on +# PowerNV systems that aren't intended as KVM hosts, but at present we +# only support RHEL on PowerNV for the purpose of being a RHEV host. + +enable kvm-setup.service diff --git a/95-kvm-memlock.conf b/95-kvm-memlock.conf new file mode 100644 index 0000000..fc59dbe --- /dev/null +++ b/95-kvm-memlock.conf @@ -0,0 +1,10 @@ +# The KVM HV implementation on Power can require a significant amount +# of unswappable memory (about half of which also needs to be host +# physically contiguous) to hold the guest's Hash Page Table (HPT) - +# roughly 1/64th of the guest's RAM size, minimum 16MiB. +# +# These limits allow unprivileged users to start smallish VMs, such as +# those used by libguestfs. +# +* hard memlock 65536 +* soft memlock 65536 diff --git a/99-qemu-guest-agent.rules b/99-qemu-guest-agent.rules new file mode 100644 index 0000000..8a290ab --- /dev/null +++ b/99-qemu-guest-agent.rules @@ -0,0 +1,2 @@ +SUBSYSTEM=="virtio-ports", ATTR{name}=="org.qemu.guest_agent.0", \ + TAG+="systemd" ENV{SYSTEMD_WANTS}="qemu-guest-agent.service" diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..15f93e6 --- /dev/null +++ b/README.rst @@ -0,0 +1,19 @@ +=================== +qemu-kvm development +=================== + +qemu-kvm is maintained in a `source tree`_ rather than directly in dist-git. +This provides way to develope using regular source code structure and provides +way to generate SRPM and build using koji service. In addition, local build using +CentOS 9 Stream specific configuration. + +Developers deliver all changes to source-git using merge request. Only maintainers +will be pushing changes sent to source-git to dist-git. + +Each release in dist-git is tagged in the source repository so you can easily +check out the source tree for a build. The tags are in the format +name-version-release, but note release doesn't contain the dist tag since the +source can be built in different build roots (Fedora, CentOS, etc.) + +.. _source tree: https://gitlab.com/redhat/centos-stream/src/qemu-kvm + diff --git a/README.tests b/README.tests new file mode 100644 index 0000000..9932773 --- /dev/null +++ b/README.tests @@ -0,0 +1,39 @@ +qemu-kvm-tests README +===================== + +The qemu-kvm-tests rpm contains tests that can be used to verify the +functionality of the installed qemu-kvm package + +When installed, the files from this rpm will be arranged in the following +directory structure + +tests-src/ +├── README +├── scripts +│   ├── qemu.py +│   └── qmp +└── tests + ├── acceptance + ├── Makefile.include + └── qemu-iotests + +The tests/ directory within the tests-src/ directory is setup to remain a copy +of a subset of the tests/ directory from the QEMU source tree + +The avocado_qemu tests and qemu-iotests, along with files required for the +execution of the avocado_qemu tests (scripts/qemu.py and scripts/qmp/) will be +installed in a new location - /usr/lib64/qemu-kvm/tests-src/ + +avocado_qemu tests: +The avocado_qemu tests can be executed by running the following avocado command: +avocado run -p qemu_bin=/usr/libexec/qemu-kvm /usr/lib64/qemu-kvm/tests/acceptance/ +Avocado needs to be installed separately using either pip or from source as +Avocado is not being packaged for RHEL-8. + +qemu-iotests: +symlinks to corresponding binaries need to be created for QEMU_PROG, +QEMU_IO_PROG, QEMU_IMG_PROG, and QEMU_NBD_PROG before the iotests can be +executed. + +The primary purpose of this package is to make these tests available to be +executed as gating tests for the virt module in the RHEL-8 OSCI environment. diff --git a/bridge.conf b/bridge.conf new file mode 100644 index 0000000..a573665 --- /dev/null +++ b/bridge.conf @@ -0,0 +1 @@ +allow virbr0 diff --git a/gating.yaml b/gating.yaml new file mode 100644 index 0000000..8d17eb8 --- /dev/null +++ b/gating.yaml @@ -0,0 +1,9 @@ +# recipients: kvmqe-ci, yfu +--- !Policy +product_versions: + - rhel-9 +decision_context: osci_compose_gate +subject_type: brew-build +rules: + - !PassingTestCaseRule {test_case_name: kvm-ci.qemu-kvm.x86_64-intel.brew-build.gating.tier1.functional} + - !PassingTestCaseRule {test_case_name: kvm-ci.qemu-kvm.x86_64-amd.brew-build.gating.tier1.functional} diff --git a/ksm.service b/ksm.service new file mode 100644 index 0000000..35c6f1d --- /dev/null +++ b/ksm.service @@ -0,0 +1,13 @@ +[Unit] +Description=Kernel Samepage Merging +ConditionPathExists=/sys/kernel/mm/ksm + +[Service] +Type=oneshot +RemainAfterExit=yes +EnvironmentFile=-/etc/sysconfig/ksm +ExecStart=/usr/libexec/ksmctl start +ExecStop=/usr/libexec/ksmctl stop + +[Install] +WantedBy=multi-user.target diff --git a/ksm.sysconfig b/ksm.sysconfig new file mode 100644 index 0000000..d99656d --- /dev/null +++ b/ksm.sysconfig @@ -0,0 +1,4 @@ +# The maximum number of unswappable kernel pages +# which may be allocated by ksm (0 for unlimited) +# If unset, defaults to half of total memory +# KSM_MAX_KERNEL_PAGES= diff --git a/ksmctl.c b/ksmctl.c new file mode 100644 index 0000000..af39591 --- /dev/null +++ b/ksmctl.c @@ -0,0 +1,77 @@ +/* Start/stop KSM, for systemd. + * Copyright (C) 2009, 2011 Red Hat, Inc. + * Written by Paolo Bonzini . + * Based on the original sysvinit script by Dan Kenigsberg + * This file is distributed under the GNU General Public License, version 2 + * or later. */ + +#include +#include +#include +#include +#include +#include + +#define KSM_MAX_KERNEL_PAGES_FILE "/sys/kernel/mm/ksm/max_kernel_pages" +#define KSM_RUN_FILE "/sys/kernel/mm/ksm/run" + +char *program_name; + +int usage(void) +{ + fprintf(stderr, "Usage: %s {start|stop}\n", program_name); + return 1; +} + +int write_value(uint64_t value, char *filename) +{ + FILE *fp; + if (!(fp = fopen(filename, "w")) || + fprintf(fp, "%llu\n", (unsigned long long) value) == EOF || + fflush(fp) == EOF || + fclose(fp) == EOF) + return 1; + + return 0; +} + +uint64_t ksm_max_kernel_pages() +{ + char *var = getenv("KSM_MAX_KERNEL_PAGES"); + char *endptr; + uint64_t value; + if (var && *var) { + value = strtoll(var, &endptr, 0); + if (value < LLONG_MAX && !*endptr) + return value; + } + /* Unless KSM_MAX_KERNEL_PAGES is set, let KSM munch up to half of + * total memory. */ + return sysconf(_SC_PHYS_PAGES) / 2; +} + +int start(void) +{ + if (access(KSM_MAX_KERNEL_PAGES_FILE, R_OK) >= 0) + write_value(ksm_max_kernel_pages(), KSM_MAX_KERNEL_PAGES_FILE); + return write_value(1, KSM_RUN_FILE); +} + +int stop(void) +{ + return write_value(0, KSM_RUN_FILE); +} + +int main(int argc, char **argv) +{ + program_name = argv[0]; + if (argc < 2) { + return usage(); + } else if (!strcmp(argv[1], "start")) { + return start(); + } else if (!strcmp(argv[1], "stop")) { + return stop(); + } else { + return usage(); + } +} diff --git a/ksmtuned b/ksmtuned new file mode 100644 index 0000000..7bc5743 --- /dev/null +++ b/ksmtuned @@ -0,0 +1,139 @@ +#!/bin/bash +# +# Copyright 2009 Red Hat, Inc. and/or its affiliates. +# Released under the GPL +# +# Author: Dan Kenigsberg +# +# ksmtuned - a simple script that controls whether (and with what vigor) ksm +# should search for duplicated pages. +# +# starts ksm when memory commited to qemu processes exceeds a threshold, and +# make ksm work harder and harder untill memory load falls below that +# threshold. +# +# send SIGUSR1 to this process right after a new qemu process is started, or +# following its death, to retune ksm accordingly +# +# needs testing and ironing. contact danken@redhat.com if something breaks. + +if [ -f /etc/ksmtuned.conf ]; then + . /etc/ksmtuned.conf +fi + +debug() { + if [ -n "$DEBUG" ]; then + s="`/bin/date`: $*" + [ -n "$LOGFILE" ] && echo "$s" >> "$LOGFILE" || echo "$s" + fi +} + + +KSM_MONITOR_INTERVAL=${KSM_MONITOR_INTERVAL:-60} +KSM_NPAGES_BOOST=${KSM_NPAGES_BOOST:-300} +KSM_NPAGES_DECAY=${KSM_NPAGES_DECAY:--50} + +KSM_NPAGES_MIN=${KSM_NPAGES_MIN:-64} +KSM_NPAGES_MAX=${KSM_NPAGES_MAX:-1250} +# millisecond sleep between ksm scans for 16Gb server. Smaller servers sleep +# more, bigger sleep less. +KSM_SLEEP_MSEC=${KSM_SLEEP_MSEC:-10} + +KSM_THRES_COEF=${KSM_THRES_COEF:-20} +KSM_THRES_CONST=${KSM_THRES_CONST:-2048} + +total=`awk '/^MemTotal:/ {print $2}' /proc/meminfo` +debug total $total + +npages=0 +sleep=$[KSM_SLEEP_MSEC * 16 * 1024 * 1024 / total] +[ $sleep -le 10 ] && sleep=10 +debug sleep $sleep +thres=$[total * KSM_THRES_COEF / 100] +if [ $KSM_THRES_CONST -gt $thres ]; then + thres=$KSM_THRES_CONST +fi +debug thres $thres + +KSMCTL () { + case x$1 in + xstop) + echo 0 > /sys/kernel/mm/ksm/run + ;; + xstart) + echo $2 > /sys/kernel/mm/ksm/pages_to_scan + echo $3 > /sys/kernel/mm/ksm/sleep_millisecs + echo 1 > /sys/kernel/mm/ksm/run + ;; + esac +} + +committed_memory () { + # calculate how much memory is committed to running qemu processes + local pidlist + pidlist=$(pgrep -d ' ' -- '^qemu(-(kvm|system-.+)|:.{1,11})$') + if [ -n "$pidlist" ]; then + ps -p "$pidlist" -o rsz= + fi | awk '{ sum += $1 }; END { print 0+sum }' +} + +free_memory () { + awk '/^(MemFree|Buffers|Cached):/ {free += $2}; END {print free}' \ + /proc/meminfo +} + +increase_npages() { + local delta + delta=${1:-0} + npages=$[npages + delta] + if [ $npages -lt $KSM_NPAGES_MIN ]; then + npages=$KSM_NPAGES_MIN + elif [ $npages -gt $KSM_NPAGES_MAX ]; then + npages=$KSM_NPAGES_MAX + fi + echo $npages +} + + +adjust () { + local free committed + free=`free_memory` + committed=`committed_memory` + debug committed $committed free $free + if [ $[committed + thres] -lt $total -a $free -gt $thres ]; then + KSMCTL stop + debug "$[committed + thres] < $total and free > $thres, stop ksm" + return 1 + fi + debug "$[committed + thres] > $total, start ksm" + if [ $free -lt $thres ]; then + npages=`increase_npages $KSM_NPAGES_BOOST` + debug "$free < $thres, boost" + else + npages=`increase_npages $KSM_NPAGES_DECAY` + debug "$free > $thres, decay" + fi + KSMCTL start $npages $sleep + debug "KSMCTL start $npages $sleep" + return 0 +} + +function nothing () { + : +} + +loop () { + trap nothing SIGUSR1 + while true + do + sleep $KSM_MONITOR_INTERVAL & + wait $! + adjust + done +} + +PIDFILE=${PIDFILE-/var/run/ksmtune.pid} +if touch "$PIDFILE"; then + loop & + echo $! > "$PIDFILE" +fi diff --git a/ksmtuned.conf b/ksmtuned.conf new file mode 100644 index 0000000..fc4518c --- /dev/null +++ b/ksmtuned.conf @@ -0,0 +1,21 @@ +# Configuration file for ksmtuned. + +# How long ksmtuned should sleep between tuning adjustments +# KSM_MONITOR_INTERVAL=60 + +# Millisecond sleep between ksm scans for 16Gb server. +# Smaller servers sleep more, bigger sleep less. +# KSM_SLEEP_MSEC=10 + +# KSM_NPAGES_BOOST=300 +# KSM_NPAGES_DECAY=-50 +# KSM_NPAGES_MIN=64 +# KSM_NPAGES_MAX=1250 + +# KSM_THRES_COEF=20 +# KSM_THRES_CONST=2048 + +# uncomment the following if you want ksmtuned debug info + +# LOGFILE=/var/log/ksmtuned +# DEBUG=1 diff --git a/ksmtuned.service b/ksmtuned.service new file mode 100644 index 0000000..39febcc --- /dev/null +++ b/ksmtuned.service @@ -0,0 +1,12 @@ +[Unit] +Description=Kernel Samepage Merging (KSM) Tuning Daemon +After=ksm.service +Requires=ksm.service + +[Service] +ExecStart=/usr/sbin/ksmtuned +ExecReload=/bin/kill -USR1 $MAINPID +Type=forking + +[Install] +WantedBy=multi-user.target diff --git a/kvm-Enable-SGX-RH-Only.patch b/kvm-Enable-SGX-RH-Only.patch new file mode 100644 index 0000000..efc8cac --- /dev/null +++ b/kvm-Enable-SGX-RH-Only.patch @@ -0,0 +1,28 @@ +From db6e042fe4fdc1a1bbf562a46b15d4d8e33e2fa6 Mon Sep 17 00:00:00 2001 +From: Paul Lai +Date: Tue, 25 Jan 2022 15:16:22 -0500 +Subject: [PATCH 4/7] Enable SGX -- RH Only + +RH-Author: Paul Lai +RH-MergeRequest: 111: numa: Enable numa for SGX EPC sections +RH-Commit: [4/5] cea874f29984897ef1232fb7749c13203c888034 +RH-Bugzilla: 1518984 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Bandan Das +RH-Acked-by: Cornelia Huck +--- + configs/devices/x86_64-softmmu/x86_64-rh-devices.mak | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +index ddf036f042..fdbbdf9742 100644 +--- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak ++++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +@@ -102,3 +102,4 @@ CONFIG_TPM_CRB=y + CONFIG_TPM_TIS_ISA=y + CONFIG_TPM_EMULATOR=y + CONFIG_TPM_PASSTHROUGH=y ++CONFIG_SGX=y +-- +2.27.0 + diff --git a/kvm-Enable-virtio-iommu-pci-on-aarch64.patch b/kvm-Enable-virtio-iommu-pci-on-aarch64.patch new file mode 100644 index 0000000..3aafd3c --- /dev/null +++ b/kvm-Enable-virtio-iommu-pci-on-aarch64.patch @@ -0,0 +1,41 @@ +From 85781b8745fa1581a66f64011d61a4f0c4e103dc Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Fri, 6 May 2022 17:03:11 +0200 +Subject: [PATCH 3/5] Enable virtio-iommu-pci on aarch64 + +RH-Author: Eric Auger +RH-MergeRequest: 83: Enable virtio-iommu-pci on aarch64 +RH-Commit: [1/1] 23e5c0832e52c66adf5fd6daccdc3edddc7ecb8b (eauger1/centos-qemu-kvm) +RH-Bugzilla: 1477099 +RH-Acked-by: Gavin Shan +RH-Acked-by: Andrew Jones +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1477099 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45128798 +Upstream Status: RHEL-only +Tested: With virtio-net-pci and virtio-block-pci + +let's enable the virtio-iommu-pci device on aarch64 by +turning CONFIG_VIRTIO_IOMMU on. + +Signed-off-by: Eric Auger +--- + configs/devices/aarch64-softmmu/aarch64-rh-devices.mak | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak +index 187938573f..1618d31b89 100644 +--- a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak ++++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak +@@ -23,6 +23,7 @@ CONFIG_VFIO_PCI=y + CONFIG_VIRTIO_MMIO=y + CONFIG_VIRTIO_PCI=y + CONFIG_VIRTIO_MEM=y ++CONFIG_VIRTIO_IOMMU=y + CONFIG_XIO3130=y + CONFIG_NVDIMM=y + CONFIG_ACPI_APEI=y +-- +2.31.1 + diff --git a/kvm-Enable-virtio-iommu-pci-on-x86_64.patch b/kvm-Enable-virtio-iommu-pci-on-x86_64.patch new file mode 100644 index 0000000..2eb24df --- /dev/null +++ b/kvm-Enable-virtio-iommu-pci-on-x86_64.patch @@ -0,0 +1,41 @@ +From c531a39171201f8a1d063e6af752e5d629c1b4bf Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 9 Jun 2022 11:35:18 +0200 +Subject: [PATCH 4/6] Enable virtio-iommu-pci on x86_64 + +RH-Author: Eric Auger +RH-MergeRequest: 100: Enable virtio-iommu-pci on x86_64 +RH-Commit: [1/1] a164af477efc7cb9d3d76a0e644f198f7c9fb2b5 (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2094252 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: MST +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094252 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45871185 +Upstream Status: RHEL-only +Tested: With virtio-net-pci and virtio-block-pci + +let's enable the virtio-iommu-pci device on x86_64 by +turning CONFIG_VIRTIO_IOMMU on. + +Signed-off-by: Eric Auger +--- + configs/devices/x86_64-softmmu/x86_64-rh-devices.mak | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +index d0c9e66641..3850b9de72 100644 +--- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak ++++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +@@ -90,6 +90,7 @@ CONFIG_VHOST_USER_BLK=y + CONFIG_VIRTIO_MEM=y + CONFIG_VIRTIO_PCI=y + CONFIG_VIRTIO_VGA=y ++CONFIG_VIRTIO_IOMMU=y + CONFIG_VMMOUSE=y + CONFIG_VMPORT=y + CONFIG_VTD=y +-- +2.31.1 + diff --git a/kvm-Introduce-event-loop-base-abstract-class.patch b/kvm-Introduce-event-loop-base-abstract-class.patch new file mode 100644 index 0000000..9f987ea --- /dev/null +++ b/kvm-Introduce-event-loop-base-abstract-class.patch @@ -0,0 +1,503 @@ +From 1163da281c178359dd7e1cf1ced5c98caa600f8e Mon Sep 17 00:00:00 2001 +From: Nicolas Saenz Julienne +Date: Mon, 25 Apr 2022 09:57:21 +0200 +Subject: [PATCH 01/16] Introduce event-loop-base abstract class + +RH-Author: Nicolas Saenz Julienne +RH-MergeRequest: 93: util/thread-pool: Expose minimum and maximum size +RH-Commit: [1/3] 5817205d8f56cc4aa98bd5963ecac54a59bad990 +RH-Bugzilla: 2031024 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Stefan Hajnoczi + +Introduce the 'event-loop-base' abstract class, it'll hold the +properties common to all event loops and provide the necessary hooks for +their creation and maintenance. Then have iothread inherit from it. + +EventLoopBaseClass is defined as user creatable and provides a hook for +its children to attach themselves to the user creatable class 'complete' +function. It also provides an update_params() callback to propagate +property changes onto its children. + +The new 'event-loop-base' class will live in the root directory. It is +built on its own using the 'link_whole' option (there are no direct +function dependencies between the class and its children, it all happens +trough 'constructor' magic). And also imposes new compilation +dependencies: + + qom <- event-loop-base <- blockdev (iothread.c) + +And in subsequent patches: + + qom <- event-loop-base <- qemuutil (util/main-loop.c) + +All this forced some amount of reordering in meson.build: + + - Moved qom build definition before qemuutil. Doing it the other way + around (i.e. moving qemuutil after qom) isn't possible as a lot of + core libraries that live in between the two depend on it. + + - Process the 'hw' subdir earlier, as it introduces files into the + 'qom' source set. + +No functional changes intended. + +Signed-off-by: Nicolas Saenz Julienne +Reviewed-by: Stefan Hajnoczi +Acked-by: Markus Armbruster +Message-id: 20220425075723.20019-2-nsaenzju@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 7d5983e3c8c40b1d0668faba31d79905c4fadd7d) +--- + event-loop-base.c | 104 +++++++++++++++++++++++++++++++ + include/sysemu/event-loop-base.h | 36 +++++++++++ + include/sysemu/iothread.h | 6 +- + iothread.c | 65 ++++++------------- + meson.build | 23 ++++--- + qapi/qom.json | 22 +++++-- + 6 files changed, 192 insertions(+), 64 deletions(-) + create mode 100644 event-loop-base.c + create mode 100644 include/sysemu/event-loop-base.h + +diff --git a/event-loop-base.c b/event-loop-base.c +new file mode 100644 +index 0000000000..a924c73a7c +--- /dev/null ++++ b/event-loop-base.c +@@ -0,0 +1,104 @@ ++/* ++ * QEMU event-loop base ++ * ++ * Copyright (C) 2022 Red Hat Inc ++ * ++ * Authors: ++ * Stefan Hajnoczi ++ * Nicolas Saenz Julienne ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++ ++#include "qemu/osdep.h" ++#include "qom/object_interfaces.h" ++#include "qapi/error.h" ++#include "sysemu/event-loop-base.h" ++ ++typedef struct { ++ const char *name; ++ ptrdiff_t offset; /* field's byte offset in EventLoopBase struct */ ++} EventLoopBaseParamInfo; ++ ++static EventLoopBaseParamInfo aio_max_batch_info = { ++ "aio-max-batch", offsetof(EventLoopBase, aio_max_batch), ++}; ++ ++static void event_loop_base_get_param(Object *obj, Visitor *v, ++ const char *name, void *opaque, Error **errp) ++{ ++ EventLoopBase *event_loop_base = EVENT_LOOP_BASE(obj); ++ EventLoopBaseParamInfo *info = opaque; ++ int64_t *field = (void *)event_loop_base + info->offset; ++ ++ visit_type_int64(v, name, field, errp); ++} ++ ++static void event_loop_base_set_param(Object *obj, Visitor *v, ++ const char *name, void *opaque, Error **errp) ++{ ++ EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(obj); ++ EventLoopBase *base = EVENT_LOOP_BASE(obj); ++ EventLoopBaseParamInfo *info = opaque; ++ int64_t *field = (void *)base + info->offset; ++ int64_t value; ++ ++ if (!visit_type_int64(v, name, &value, errp)) { ++ return; ++ } ++ ++ if (value < 0) { ++ error_setg(errp, "%s value must be in range [0, %" PRId64 "]", ++ info->name, INT64_MAX); ++ return; ++ } ++ ++ *field = value; ++ ++ if (bc->update_params) { ++ bc->update_params(base, errp); ++ } ++ ++ return; ++} ++ ++static void event_loop_base_complete(UserCreatable *uc, Error **errp) ++{ ++ EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(uc); ++ EventLoopBase *base = EVENT_LOOP_BASE(uc); ++ ++ if (bc->init) { ++ bc->init(base, errp); ++ } ++} ++ ++static void event_loop_base_class_init(ObjectClass *klass, void *class_data) ++{ ++ UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass); ++ ucc->complete = event_loop_base_complete; ++ ++ object_class_property_add(klass, "aio-max-batch", "int", ++ event_loop_base_get_param, ++ event_loop_base_set_param, ++ NULL, &aio_max_batch_info); ++} ++ ++static const TypeInfo event_loop_base_info = { ++ .name = TYPE_EVENT_LOOP_BASE, ++ .parent = TYPE_OBJECT, ++ .instance_size = sizeof(EventLoopBase), ++ .class_size = sizeof(EventLoopBaseClass), ++ .class_init = event_loop_base_class_init, ++ .abstract = true, ++ .interfaces = (InterfaceInfo[]) { ++ { TYPE_USER_CREATABLE }, ++ { } ++ } ++}; ++ ++static void register_types(void) ++{ ++ type_register_static(&event_loop_base_info); ++} ++type_init(register_types); +diff --git a/include/sysemu/event-loop-base.h b/include/sysemu/event-loop-base.h +new file mode 100644 +index 0000000000..8e77d8b69f +--- /dev/null ++++ b/include/sysemu/event-loop-base.h +@@ -0,0 +1,36 @@ ++/* ++ * QEMU event-loop backend ++ * ++ * Copyright (C) 2022 Red Hat Inc ++ * ++ * Authors: ++ * Nicolas Saenz Julienne ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++#ifndef QEMU_EVENT_LOOP_BASE_H ++#define QEMU_EVENT_LOOP_BASE_H ++ ++#include "qom/object.h" ++#include "block/aio.h" ++#include "qemu/typedefs.h" ++ ++#define TYPE_EVENT_LOOP_BASE "event-loop-base" ++OBJECT_DECLARE_TYPE(EventLoopBase, EventLoopBaseClass, ++ EVENT_LOOP_BASE) ++ ++struct EventLoopBaseClass { ++ ObjectClass parent_class; ++ ++ void (*init)(EventLoopBase *base, Error **errp); ++ void (*update_params)(EventLoopBase *base, Error **errp); ++}; ++ ++struct EventLoopBase { ++ Object parent; ++ ++ /* AioContext AIO engine parameters */ ++ int64_t aio_max_batch; ++}; ++#endif +diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h +index 7f714bd136..8f8601d6ab 100644 +--- a/include/sysemu/iothread.h ++++ b/include/sysemu/iothread.h +@@ -17,11 +17,12 @@ + #include "block/aio.h" + #include "qemu/thread.h" + #include "qom/object.h" ++#include "sysemu/event-loop-base.h" + + #define TYPE_IOTHREAD "iothread" + + struct IOThread { +- Object parent_obj; ++ EventLoopBase parent_obj; + + QemuThread thread; + AioContext *ctx; +@@ -37,9 +38,6 @@ struct IOThread { + int64_t poll_max_ns; + int64_t poll_grow; + int64_t poll_shrink; +- +- /* AioContext AIO engine parameters */ +- int64_t aio_max_batch; + }; + typedef struct IOThread IOThread; + +diff --git a/iothread.c b/iothread.c +index 0f98af0f2a..8fa2f3bfb8 100644 +--- a/iothread.c ++++ b/iothread.c +@@ -17,6 +17,7 @@ + #include "qemu/module.h" + #include "block/aio.h" + #include "block/block.h" ++#include "sysemu/event-loop-base.h" + #include "sysemu/iothread.h" + #include "qapi/error.h" + #include "qapi/qapi-commands-misc.h" +@@ -152,10 +153,15 @@ static void iothread_init_gcontext(IOThread *iothread) + iothread->main_loop = g_main_loop_new(iothread->worker_context, TRUE); + } + +-static void iothread_set_aio_context_params(IOThread *iothread, Error **errp) ++static void iothread_set_aio_context_params(EventLoopBase *base, Error **errp) + { ++ IOThread *iothread = IOTHREAD(base); + ERRP_GUARD(); + ++ if (!iothread->ctx) { ++ return; ++ } ++ + aio_context_set_poll_params(iothread->ctx, + iothread->poll_max_ns, + iothread->poll_grow, +@@ -166,14 +172,15 @@ static void iothread_set_aio_context_params(IOThread *iothread, Error **errp) + } + + aio_context_set_aio_params(iothread->ctx, +- iothread->aio_max_batch, ++ iothread->parent_obj.aio_max_batch, + errp); + } + +-static void iothread_complete(UserCreatable *obj, Error **errp) ++ ++static void iothread_init(EventLoopBase *base, Error **errp) + { + Error *local_error = NULL; +- IOThread *iothread = IOTHREAD(obj); ++ IOThread *iothread = IOTHREAD(base); + char *thread_name; + + iothread->stopping = false; +@@ -189,7 +196,7 @@ static void iothread_complete(UserCreatable *obj, Error **errp) + */ + iothread_init_gcontext(iothread); + +- iothread_set_aio_context_params(iothread, &local_error); ++ iothread_set_aio_context_params(base, &local_error); + if (local_error) { + error_propagate(errp, local_error); + aio_context_unref(iothread->ctx); +@@ -201,7 +208,7 @@ static void iothread_complete(UserCreatable *obj, Error **errp) + * to inherit. + */ + thread_name = g_strdup_printf("IO %s", +- object_get_canonical_path_component(OBJECT(obj))); ++ object_get_canonical_path_component(OBJECT(base))); + qemu_thread_create(&iothread->thread, thread_name, iothread_run, + iothread, QEMU_THREAD_JOINABLE); + g_free(thread_name); +@@ -226,9 +233,6 @@ static IOThreadParamInfo poll_grow_info = { + static IOThreadParamInfo poll_shrink_info = { + "poll-shrink", offsetof(IOThread, poll_shrink), + }; +-static IOThreadParamInfo aio_max_batch_info = { +- "aio-max-batch", offsetof(IOThread, aio_max_batch), +-}; + + static void iothread_get_param(Object *obj, Visitor *v, + const char *name, IOThreadParamInfo *info, Error **errp) +@@ -288,35 +292,12 @@ static void iothread_set_poll_param(Object *obj, Visitor *v, + } + } + +-static void iothread_get_aio_param(Object *obj, Visitor *v, +- const char *name, void *opaque, Error **errp) +-{ +- IOThreadParamInfo *info = opaque; +- +- iothread_get_param(obj, v, name, info, errp); +-} +- +-static void iothread_set_aio_param(Object *obj, Visitor *v, +- const char *name, void *opaque, Error **errp) +-{ +- IOThread *iothread = IOTHREAD(obj); +- IOThreadParamInfo *info = opaque; +- +- if (!iothread_set_param(obj, v, name, info, errp)) { +- return; +- } +- +- if (iothread->ctx) { +- aio_context_set_aio_params(iothread->ctx, +- iothread->aio_max_batch, +- errp); +- } +-} +- + static void iothread_class_init(ObjectClass *klass, void *class_data) + { +- UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass); +- ucc->complete = iothread_complete; ++ EventLoopBaseClass *bc = EVENT_LOOP_BASE_CLASS(klass); ++ ++ bc->init = iothread_init; ++ bc->update_params = iothread_set_aio_context_params; + + object_class_property_add(klass, "poll-max-ns", "int", + iothread_get_poll_param, +@@ -330,23 +311,15 @@ static void iothread_class_init(ObjectClass *klass, void *class_data) + iothread_get_poll_param, + iothread_set_poll_param, + NULL, &poll_shrink_info); +- object_class_property_add(klass, "aio-max-batch", "int", +- iothread_get_aio_param, +- iothread_set_aio_param, +- NULL, &aio_max_batch_info); + } + + static const TypeInfo iothread_info = { + .name = TYPE_IOTHREAD, +- .parent = TYPE_OBJECT, ++ .parent = TYPE_EVENT_LOOP_BASE, + .class_init = iothread_class_init, + .instance_size = sizeof(IOThread), + .instance_init = iothread_instance_init, + .instance_finalize = iothread_instance_finalize, +- .interfaces = (InterfaceInfo[]) { +- {TYPE_USER_CREATABLE}, +- {} +- }, + }; + + static void iothread_register_types(void) +@@ -383,7 +356,7 @@ static int query_one_iothread(Object *object, void *opaque) + info->poll_max_ns = iothread->poll_max_ns; + info->poll_grow = iothread->poll_grow; + info->poll_shrink = iothread->poll_shrink; +- info->aio_max_batch = iothread->aio_max_batch; ++ info->aio_max_batch = iothread->parent_obj.aio_max_batch; + + QAPI_LIST_APPEND(*tail, info); + return 0; +diff --git a/meson.build b/meson.build +index 6f7e430f0f..b9c919a55e 100644 +--- a/meson.build ++++ b/meson.build +@@ -2804,6 +2804,7 @@ subdir('qom') + subdir('authz') + subdir('crypto') + subdir('ui') ++subdir('hw') + + + if enable_modules +@@ -2811,6 +2812,18 @@ if enable_modules + modulecommon = declare_dependency(link_whole: libmodulecommon, compile_args: '-DBUILD_DSO') + endif + ++qom_ss = qom_ss.apply(config_host, strict: false) ++libqom = static_library('qom', qom_ss.sources() + genh, ++ dependencies: [qom_ss.dependencies()], ++ name_suffix: 'fa') ++qom = declare_dependency(link_whole: libqom) ++ ++event_loop_base = files('event-loop-base.c') ++event_loop_base = static_library('event-loop-base', sources: event_loop_base + genh, ++ build_by_default: true) ++event_loop_base = declare_dependency(link_whole: event_loop_base, ++ dependencies: [qom]) ++ + stub_ss = stub_ss.apply(config_all, strict: false) + + util_ss.add_all(trace_ss) +@@ -2897,7 +2910,6 @@ subdir('monitor') + subdir('net') + subdir('replay') + subdir('semihosting') +-subdir('hw') + subdir('tcg') + subdir('fpu') + subdir('accel') +@@ -3022,13 +3034,6 @@ qemu_syms = custom_target('qemu.syms', output: 'qemu.syms', + capture: true, + command: [undefsym, nm, '@INPUT@']) + +-qom_ss = qom_ss.apply(config_host, strict: false) +-libqom = static_library('qom', qom_ss.sources() + genh, +- dependencies: [qom_ss.dependencies()], +- name_suffix: 'fa') +- +-qom = declare_dependency(link_whole: libqom) +- + authz_ss = authz_ss.apply(config_host, strict: false) + libauthz = static_library('authz', authz_ss.sources() + genh, + dependencies: [authz_ss.dependencies()], +@@ -3081,7 +3086,7 @@ libblockdev = static_library('blockdev', blockdev_ss.sources() + genh, + build_by_default: false) + + blockdev = declare_dependency(link_whole: [libblockdev], +- dependencies: [block]) ++ dependencies: [block, event_loop_base]) + + qmp_ss = qmp_ss.apply(config_host, strict: false) + libqmp = static_library('qmp', qmp_ss.sources() + genh, +diff --git a/qapi/qom.json b/qapi/qom.json +index eeb5395ff3..a2439533c5 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -499,6 +499,20 @@ + '*repeat': 'bool', + '*grab-toggle': 'GrabToggleKeys' } } + ++## ++# @EventLoopBaseProperties: ++# ++# Common properties for event loops ++# ++# @aio-max-batch: maximum number of requests in a batch for the AIO engine, ++# 0 means that the engine will use its default. ++# (default: 0) ++# ++# Since: 7.1 ++## ++{ 'struct': 'EventLoopBaseProperties', ++ 'data': { '*aio-max-batch': 'int' } } ++ + ## + # @IothreadProperties: + # +@@ -516,17 +530,15 @@ + # algorithm detects it is spending too long polling without + # encountering events. 0 selects a default behaviour (default: 0) + # +-# @aio-max-batch: maximum number of requests in a batch for the AIO engine, +-# 0 means that the engine will use its default +-# (default:0, since 6.1) ++# The @aio-max-batch option is available since 6.1. + # + # Since: 2.0 + ## + { 'struct': 'IothreadProperties', ++ 'base': 'EventLoopBaseProperties', + 'data': { '*poll-max-ns': 'int', + '*poll-grow': 'int', +- '*poll-shrink': 'int', +- '*aio-max-batch': 'int' } } ++ '*poll-shrink': 'int' } } + + ## + # @MemoryBackendProperties: +-- +2.31.1 + diff --git a/kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch b/kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch new file mode 100644 index 0000000..c7b8898 --- /dev/null +++ b/kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch @@ -0,0 +1,420 @@ +From cda3fcf14f2883fea633e25256f6c14a71271adf Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Fri, 13 May 2022 03:28:31 -0300 +Subject: [PATCH 08/18] QIOChannel: Add flags on io_writev and introduce + io_flush callback +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd +RH-Commit: [2/11] 06acfb6b0cb2c25733c2eb198011f7623b5a7024 (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 1968509 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Add flags to io_writev and introduce io_flush as optional callback to +QIOChannelClass, allowing the implementation of zero copy writes by +subclasses. + +How to use them: +- Write data using qio_channel_writev*(...,QIO_CHANNEL_WRITE_FLAG_ZERO_COPY), +- Wait write completion with qio_channel_flush(). + +Notes: +As some zero copy write implementations work asynchronously, it's +recommended to keep the write buffer untouched until the return of +qio_channel_flush(), to avoid the risk of sending an updated buffer +instead of the buffer state during write. + +As io_flush callback is optional, if a subclass does not implement it, then: +- io_flush will return 0 without changing anything. + +Also, some functions like qio_channel_writev_full_all() were adapted to +receive a flag parameter. That allows shared code between zero copy and +non-zero copy writev, and also an easier implementation on new flags. + +Signed-off-by: Leonardo Bras +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Peter Xu +Reviewed-by: Juan Quintela +Message-Id: <20220513062836.965425-3-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit b88651cb4d4fa416fdbb6afaf5b26ec8c035eaad) +Signed-off-by: Leonardo Bras +--- + chardev/char-io.c | 2 +- + hw/remote/mpqemu-link.c | 2 +- + include/io/channel.h | 38 +++++++++++++++++++++- + io/channel-buffer.c | 1 + + io/channel-command.c | 1 + + io/channel-file.c | 1 + + io/channel-socket.c | 2 ++ + io/channel-tls.c | 1 + + io/channel-websock.c | 1 + + io/channel.c | 49 +++++++++++++++++++++++------ + migration/rdma.c | 1 + + scsi/pr-manager-helper.c | 2 +- + tests/unit/test-io-channel-socket.c | 1 + + 13 files changed, 88 insertions(+), 14 deletions(-) + +diff --git a/chardev/char-io.c b/chardev/char-io.c +index 8ced184160..4451128cba 100644 +--- a/chardev/char-io.c ++++ b/chardev/char-io.c +@@ -122,7 +122,7 @@ int io_channel_send_full(QIOChannel *ioc, + + ret = qio_channel_writev_full( + ioc, &iov, 1, +- fds, nfds, NULL); ++ fds, nfds, 0, NULL); + if (ret == QIO_CHANNEL_ERR_BLOCK) { + if (offset) { + return offset; +diff --git a/hw/remote/mpqemu-link.c b/hw/remote/mpqemu-link.c +index 7e841820e5..e8f556bd27 100644 +--- a/hw/remote/mpqemu-link.c ++++ b/hw/remote/mpqemu-link.c +@@ -69,7 +69,7 @@ bool mpqemu_msg_send(MPQemuMsg *msg, QIOChannel *ioc, Error **errp) + } + + if (!qio_channel_writev_full_all(ioc, send, G_N_ELEMENTS(send), +- fds, nfds, errp)) { ++ fds, nfds, 0, errp)) { + ret = true; + } else { + trace_mpqemu_send_io_error(msg->cmd, msg->size, nfds); +diff --git a/include/io/channel.h b/include/io/channel.h +index 88988979f8..c680ee7480 100644 +--- a/include/io/channel.h ++++ b/include/io/channel.h +@@ -32,12 +32,15 @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass, + + #define QIO_CHANNEL_ERR_BLOCK -2 + ++#define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1 ++ + typedef enum QIOChannelFeature QIOChannelFeature; + + enum QIOChannelFeature { + QIO_CHANNEL_FEATURE_FD_PASS, + QIO_CHANNEL_FEATURE_SHUTDOWN, + QIO_CHANNEL_FEATURE_LISTEN, ++ QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY, + }; + + +@@ -104,6 +107,7 @@ struct QIOChannelClass { + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp); + ssize_t (*io_readv)(QIOChannel *ioc, + const struct iovec *iov, +@@ -136,6 +140,8 @@ struct QIOChannelClass { + IOHandler *io_read, + IOHandler *io_write, + void *opaque); ++ int (*io_flush)(QIOChannel *ioc, ++ Error **errp); + }; + + /* General I/O handling functions */ +@@ -228,6 +234,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc, + * @niov: the length of the @iov array + * @fds: an array of file handles to send + * @nfds: number of file handles in @fds ++ * @flags: write flags (QIO_CHANNEL_WRITE_FLAG_*) + * @errp: pointer to a NULL-initialized error object + * + * Write data to the IO channel, reading it from the +@@ -260,6 +267,7 @@ ssize_t qio_channel_writev_full(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp); + + /** +@@ -837,6 +845,7 @@ int qio_channel_readv_full_all(QIOChannel *ioc, + * @niov: the length of the @iov array + * @fds: an array of file handles to send + * @nfds: number of file handles in @fds ++ * @flags: write flags (QIO_CHANNEL_WRITE_FLAG_*) + * @errp: pointer to a NULL-initialized error object + * + * +@@ -846,6 +855,14 @@ int qio_channel_readv_full_all(QIOChannel *ioc, + * to be written, yielding from the current coroutine + * if required. + * ++ * If QIO_CHANNEL_WRITE_FLAG_ZERO_COPY is passed in flags, ++ * instead of waiting for all requested data to be written, ++ * this function will wait until it's all queued for writing. ++ * In this case, if the buffer gets changed between queueing and ++ * sending, the updated buffer will be sent. If this is not a ++ * desired behavior, it's suggested to call qio_channel_flush() ++ * before reusing the buffer. ++ * + * Returns: 0 if all bytes were written, or -1 on error + */ + +@@ -853,6 +870,25 @@ int qio_channel_writev_full_all(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int *fds, size_t nfds, +- Error **errp); ++ int flags, Error **errp); ++ ++/** ++ * qio_channel_flush: ++ * @ioc: the channel object ++ * @errp: pointer to a NULL-initialized error object ++ * ++ * Will block until every packet queued with ++ * qio_channel_writev_full() + QIO_CHANNEL_WRITE_FLAG_ZERO_COPY ++ * is sent, or return in case of any error. ++ * ++ * If not implemented, acts as a no-op, and returns 0. ++ * ++ * Returns -1 if any error is found, ++ * 1 if every send failed to use zero copy. ++ * 0 otherwise. ++ */ ++ ++int qio_channel_flush(QIOChannel *ioc, ++ Error **errp); + + #endif /* QIO_CHANNEL_H */ +diff --git a/io/channel-buffer.c b/io/channel-buffer.c +index baa4e2b089..bf52011be2 100644 +--- a/io/channel-buffer.c ++++ b/io/channel-buffer.c +@@ -81,6 +81,7 @@ static ssize_t qio_channel_buffer_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc); +diff --git a/io/channel-command.c b/io/channel-command.c +index 338da73ade..54560464ae 100644 +--- a/io/channel-command.c ++++ b/io/channel-command.c +@@ -258,6 +258,7 @@ static ssize_t qio_channel_command_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc); +diff --git a/io/channel-file.c b/io/channel-file.c +index d7cf6d278f..ef6807a6be 100644 +--- a/io/channel-file.c ++++ b/io/channel-file.c +@@ -114,6 +114,7 @@ static ssize_t qio_channel_file_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc); +diff --git a/io/channel-socket.c b/io/channel-socket.c +index 7a8d9f69c9..a1be2197ca 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -525,6 +525,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); +@@ -620,6 +621,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); +diff --git a/io/channel-tls.c b/io/channel-tls.c +index 2ae1b92fc0..4ce890a538 100644 +--- a/io/channel-tls.c ++++ b/io/channel-tls.c +@@ -301,6 +301,7 @@ static ssize_t qio_channel_tls_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc); +diff --git a/io/channel-websock.c b/io/channel-websock.c +index 55145a6a8c..9619906ac3 100644 +--- a/io/channel-websock.c ++++ b/io/channel-websock.c +@@ -1127,6 +1127,7 @@ static ssize_t qio_channel_websock_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc); +diff --git a/io/channel.c b/io/channel.c +index e8b019dc36..0640941ac5 100644 +--- a/io/channel.c ++++ b/io/channel.c +@@ -72,18 +72,32 @@ ssize_t qio_channel_writev_full(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); + +- if ((fds || nfds) && +- !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS)) { ++ if (fds || nfds) { ++ if (!qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS)) { ++ error_setg_errno(errp, EINVAL, ++ "Channel does not support file descriptor passing"); ++ return -1; ++ } ++ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { ++ error_setg_errno(errp, EINVAL, ++ "Zero Copy does not support file descriptor passing"); ++ return -1; ++ } ++ } ++ ++ if ((flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) && ++ !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) { + error_setg_errno(errp, EINVAL, +- "Channel does not support file descriptor passing"); ++ "Requested Zero Copy feature is not available"); + return -1; + } + +- return klass->io_writev(ioc, iov, niov, fds, nfds, errp); ++ return klass->io_writev(ioc, iov, niov, fds, nfds, flags, errp); + } + + +@@ -217,14 +231,14 @@ int qio_channel_writev_all(QIOChannel *ioc, + size_t niov, + Error **errp) + { +- return qio_channel_writev_full_all(ioc, iov, niov, NULL, 0, errp); ++ return qio_channel_writev_full_all(ioc, iov, niov, NULL, 0, 0, errp); + } + + int qio_channel_writev_full_all(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int *fds, size_t nfds, +- Error **errp) ++ int flags, Error **errp) + { + int ret = -1; + struct iovec *local_iov = g_new(struct iovec, niov); +@@ -237,8 +251,10 @@ int qio_channel_writev_full_all(QIOChannel *ioc, + + while (nlocal_iov > 0) { + ssize_t len; +- len = qio_channel_writev_full(ioc, local_iov, nlocal_iov, fds, nfds, +- errp); ++ ++ len = qio_channel_writev_full(ioc, local_iov, nlocal_iov, fds, ++ nfds, flags, errp); ++ + if (len == QIO_CHANNEL_ERR_BLOCK) { + if (qemu_in_coroutine()) { + qio_channel_yield(ioc, G_IO_OUT); +@@ -277,7 +293,7 @@ ssize_t qio_channel_writev(QIOChannel *ioc, + size_t niov, + Error **errp) + { +- return qio_channel_writev_full(ioc, iov, niov, NULL, 0, errp); ++ return qio_channel_writev_full(ioc, iov, niov, NULL, 0, 0, errp); + } + + +@@ -297,7 +313,7 @@ ssize_t qio_channel_write(QIOChannel *ioc, + Error **errp) + { + struct iovec iov = { .iov_base = (char *)buf, .iov_len = buflen }; +- return qio_channel_writev_full(ioc, &iov, 1, NULL, 0, errp); ++ return qio_channel_writev_full(ioc, &iov, 1, NULL, 0, 0, errp); + } + + +@@ -473,6 +489,19 @@ off_t qio_channel_io_seek(QIOChannel *ioc, + return klass->io_seek(ioc, offset, whence, errp); + } + ++int qio_channel_flush(QIOChannel *ioc, ++ Error **errp) ++{ ++ QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); ++ ++ if (!klass->io_flush || ++ !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) { ++ return 0; ++ } ++ ++ return klass->io_flush(ioc, errp); ++} ++ + + static void qio_channel_restart_read(void *opaque) + { +diff --git a/migration/rdma.c b/migration/rdma.c +index ef1e65ec36..672d1958a9 100644 +--- a/migration/rdma.c ++++ b/migration/rdma.c +@@ -2840,6 +2840,7 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); +diff --git a/scsi/pr-manager-helper.c b/scsi/pr-manager-helper.c +index 451c7631b7..3be52a98d5 100644 +--- a/scsi/pr-manager-helper.c ++++ b/scsi/pr-manager-helper.c +@@ -77,7 +77,7 @@ static int pr_manager_helper_write(PRManagerHelper *pr_mgr, + iov.iov_base = (void *)buf; + iov.iov_len = sz; + n_written = qio_channel_writev_full(QIO_CHANNEL(pr_mgr->ioc), &iov, 1, +- nfds ? &fd : NULL, nfds, errp); ++ nfds ? &fd : NULL, nfds, 0, errp); + + if (n_written <= 0) { + assert(n_written != QIO_CHANNEL_ERR_BLOCK); +diff --git a/tests/unit/test-io-channel-socket.c b/tests/unit/test-io-channel-socket.c +index c49eec1f03..6713886d02 100644 +--- a/tests/unit/test-io-channel-socket.c ++++ b/tests/unit/test-io-channel-socket.c +@@ -444,6 +444,7 @@ static void test_io_channel_unix_fd_pass(void) + G_N_ELEMENTS(iosend), + fdsend, + G_N_ELEMENTS(fdsend), ++ 0, + &error_abort); + + qio_channel_readv_full(dst, +-- +2.35.3 + diff --git a/kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch b/kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch new file mode 100644 index 0000000..9d134e6 --- /dev/null +++ b/kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch @@ -0,0 +1,58 @@ +From e70f01749addd7d0b7aa7fa4fdedb664f98e6b9b Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Mon, 20 Jun 2022 02:39:43 -0300 +Subject: [PATCH 16/18] QIOChannelSocket: Fix zero-copy send so socket flush + works +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd +RH-Commit: [10/11] a2dfac987e24026b1a78e90b86234ca206b6401f (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 1968509 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Somewhere between v6 and v7 the of the zero-copy-send patchset a crucial +part of the flushing mechanism got missing: incrementing zero_copy_queued. + +Without that, the flushing interface becomes a no-op, and there is no +guarantee the buffer is really sent. + +This can go as bad as causing a corruption in RAM during migration. + +Fixes: 2bc58ffc2926 ("QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX") +Reported-by: 徐闯 +Signed-off-by: Leonardo Bras +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Peter Xu +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 4f5a09714c983a3471fd12e3c7f3196e95c650c1) +Signed-off-by: Leonardo Bras +--- + io/channel-socket.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/io/channel-socket.c b/io/channel-socket.c +index 7490e5943d..8ae8b212cf 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -612,6 +612,11 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + "Unable to write to socket"); + return -1; + } ++ ++ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { ++ sioc->zero_copy_queued++; ++ } ++ + return ret; + } + #else /* WIN32 */ +-- +2.35.3 + diff --git a/kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch b/kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch new file mode 100644 index 0000000..89aa806 --- /dev/null +++ b/kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch @@ -0,0 +1,249 @@ +From 4aeba0365d30dabe2e70dc172683f0878a4a9621 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Fri, 13 May 2022 03:28:32 -0300 +Subject: [PATCH 09/18] QIOChannelSocket: Implement io_writev zero copy flag & + io_flush for CONFIG_LINUX +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd +RH-Commit: [3/11] 9afeac1f5ac7675624660a0281726c09c8321180 (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 1968509 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +For CONFIG_LINUX, implement the new zero copy flag and the optional callback +io_flush on QIOChannelSocket, but enables it only when MSG_ZEROCOPY +feature is available in the host kernel, which is checked on +qio_channel_socket_connect_sync() + +qio_channel_socket_flush() was implemented by counting how many times +sendmsg(...,MSG_ZEROCOPY) was successfully called, and then reading the +socket's error queue, in order to find how many of them finished sending. +Flush will loop until those counters are the same, or until some error occurs. + +Notes on using writev() with QIO_CHANNEL_WRITE_FLAG_ZERO_COPY: +1: Buffer +- As MSG_ZEROCOPY tells the kernel to use the same user buffer to avoid copying, +some caution is necessary to avoid overwriting any buffer before it's sent. +If something like this happen, a newer version of the buffer may be sent instead. +- If this is a problem, it's recommended to call qio_channel_flush() before freeing +or re-using the buffer. + +2: Locked memory +- When using MSG_ZERCOCOPY, the buffer memory will be locked after queued, and +unlocked after it's sent. +- Depending on the size of each buffer, and how often it's sent, it may require +a larger amount of locked memory than usually available to non-root user. +- If the required amount of locked memory is not available, writev_zero_copy +will return an error, which can abort an operation like migration, +- Because of this, when an user code wants to add zero copy as a feature, it +requires a mechanism to disable it, so it can still be accessible to less +privileged users. + +Signed-off-by: Leonardo Bras +Reviewed-by: Peter Xu +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Juan Quintela +Message-Id: <20220513062836.965425-4-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 2bc58ffc2926a4efdd03edfb5909861fefc68c3d) +Signed-off-by: Leonardo Bras +--- + include/io/channel-socket.h | 2 + + io/channel-socket.c | 116 ++++++++++++++++++++++++++++++++++-- + 2 files changed, 114 insertions(+), 4 deletions(-) + +diff --git a/include/io/channel-socket.h b/include/io/channel-socket.h +index e747e63514..513c428fe4 100644 +--- a/include/io/channel-socket.h ++++ b/include/io/channel-socket.h +@@ -47,6 +47,8 @@ struct QIOChannelSocket { + socklen_t localAddrLen; + struct sockaddr_storage remoteAddr; + socklen_t remoteAddrLen; ++ ssize_t zero_copy_queued; ++ ssize_t zero_copy_sent; + }; + + +diff --git a/io/channel-socket.c b/io/channel-socket.c +index a1be2197ca..fbd2214d20 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -26,6 +26,14 @@ + #include "io/channel-watch.h" + #include "trace.h" + #include "qapi/clone-visitor.h" ++#ifdef CONFIG_LINUX ++#include ++#include ++ ++#if (defined(MSG_ZEROCOPY) && defined(SO_ZEROCOPY)) ++#define QEMU_MSG_ZEROCOPY ++#endif ++#endif + + #define SOCKET_MAX_FDS 16 + +@@ -55,6 +63,8 @@ qio_channel_socket_new(void) + + sioc = QIO_CHANNEL_SOCKET(object_new(TYPE_QIO_CHANNEL_SOCKET)); + sioc->fd = -1; ++ sioc->zero_copy_queued = 0; ++ sioc->zero_copy_sent = 0; + + ioc = QIO_CHANNEL(sioc); + qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN); +@@ -154,6 +164,16 @@ int qio_channel_socket_connect_sync(QIOChannelSocket *ioc, + return -1; + } + ++#ifdef QEMU_MSG_ZEROCOPY ++ int ret, v = 1; ++ ret = setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &v, sizeof(v)); ++ if (ret == 0) { ++ /* Zero copy available on host */ ++ qio_channel_set_feature(QIO_CHANNEL(ioc), ++ QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY); ++ } ++#endif ++ + return 0; + } + +@@ -534,6 +554,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + char control[CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)]; + size_t fdsize = sizeof(int) * nfds; + struct cmsghdr *cmsg; ++ int sflags = 0; + + memset(control, 0, CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)); + +@@ -558,15 +579,31 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + memcpy(CMSG_DATA(cmsg), fds, fdsize); + } + ++#ifdef QEMU_MSG_ZEROCOPY ++ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { ++ sflags = MSG_ZEROCOPY; ++ } ++#endif ++ + retry: +- ret = sendmsg(sioc->fd, &msg, 0); ++ ret = sendmsg(sioc->fd, &msg, sflags); + if (ret <= 0) { +- if (errno == EAGAIN) { ++ switch (errno) { ++ case EAGAIN: + return QIO_CHANNEL_ERR_BLOCK; +- } +- if (errno == EINTR) { ++ case EINTR: + goto retry; ++#ifdef QEMU_MSG_ZEROCOPY ++ case ENOBUFS: ++ if (sflags & MSG_ZEROCOPY) { ++ error_setg_errno(errp, errno, ++ "Process can't lock enough memory for using MSG_ZEROCOPY"); ++ return -1; ++ } ++ break; ++#endif + } ++ + error_setg_errno(errp, errno, + "Unable to write to socket"); + return -1; +@@ -660,6 +697,74 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + } + #endif /* WIN32 */ + ++ ++#ifdef QEMU_MSG_ZEROCOPY ++static int qio_channel_socket_flush(QIOChannel *ioc, ++ Error **errp) ++{ ++ QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); ++ struct msghdr msg = {}; ++ struct sock_extended_err *serr; ++ struct cmsghdr *cm; ++ char control[CMSG_SPACE(sizeof(*serr))]; ++ int received; ++ int ret = 1; ++ ++ msg.msg_control = control; ++ msg.msg_controllen = sizeof(control); ++ memset(control, 0, sizeof(control)); ++ ++ while (sioc->zero_copy_sent < sioc->zero_copy_queued) { ++ received = recvmsg(sioc->fd, &msg, MSG_ERRQUEUE); ++ if (received < 0) { ++ switch (errno) { ++ case EAGAIN: ++ /* Nothing on errqueue, wait until something is available */ ++ qio_channel_wait(ioc, G_IO_ERR); ++ continue; ++ case EINTR: ++ continue; ++ default: ++ error_setg_errno(errp, errno, ++ "Unable to read errqueue"); ++ return -1; ++ } ++ } ++ ++ cm = CMSG_FIRSTHDR(&msg); ++ if (cm->cmsg_level != SOL_IP && ++ cm->cmsg_type != IP_RECVERR) { ++ error_setg_errno(errp, EPROTOTYPE, ++ "Wrong cmsg in errqueue"); ++ return -1; ++ } ++ ++ serr = (void *) CMSG_DATA(cm); ++ if (serr->ee_errno != SO_EE_ORIGIN_NONE) { ++ error_setg_errno(errp, serr->ee_errno, ++ "Error on socket"); ++ return -1; ++ } ++ if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) { ++ error_setg_errno(errp, serr->ee_origin, ++ "Error not from zero copy"); ++ return -1; ++ } ++ ++ /* No errors, count successfully finished sendmsg()*/ ++ sioc->zero_copy_sent += serr->ee_data - serr->ee_info + 1; ++ ++ /* If any sendmsg() succeeded using zero copy, return 0 at the end */ ++ if (serr->ee_code != SO_EE_CODE_ZEROCOPY_COPIED) { ++ ret = 0; ++ } ++ } ++ ++ return ret; ++} ++ ++#endif /* QEMU_MSG_ZEROCOPY */ ++ + static int + qio_channel_socket_set_blocking(QIOChannel *ioc, + bool enabled, +@@ -790,6 +895,9 @@ static void qio_channel_socket_class_init(ObjectClass *klass, + ioc_klass->io_set_delay = qio_channel_socket_set_delay; + ioc_klass->io_create_watch = qio_channel_socket_create_watch; + ioc_klass->io_set_aio_fd_handler = qio_channel_socket_set_aio_fd_handler; ++#ifdef QEMU_MSG_ZEROCOPY ++ ioc_klass->io_flush = qio_channel_socket_flush; ++#endif + } + + static const TypeInfo qio_channel_socket_info = { +-- +2.35.3 + diff --git a/kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch b/kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch new file mode 100644 index 0000000..6fc0c76 --- /dev/null +++ b/kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch @@ -0,0 +1,82 @@ +From 60bf942a58db12c821f2a6a49e2e0b04b99bec30 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Mon, 20 Jun 2022 02:39:42 -0300 +Subject: [PATCH 15/18] QIOChannelSocket: Introduce assert and reduce ifdefs to + improve readability +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd +RH-Commit: [9/11] eaa02d68301852ccc98bdacc7387d8d03be1cb05 (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 1968509 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +During implementation of MSG_ZEROCOPY feature, a lot of #ifdefs were +introduced, particularly at qio_channel_socket_writev(). + +Rewrite some of those changes so it's easier to read. + +Also, introduce an assert to help detect incorrect zero-copy usage is when +it's disabled on build. + +Signed-off-by: Leonardo Bras +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Juan Quintela +Reviewed-by: Peter Xu +Signed-off-by: Juan Quintela +Signed-off-by: Dr. David Alan Gilbert + dgilbert: Fixed up thinko'd g_assert_unreachable->g_assert_not_reached +(cherry picked from commit 803ca43e4c7fcf32f9f68c118301ccd0c83ece3f) +Signed-off-by: Leonardo Bras +--- + io/channel-socket.c | 14 +++++++++----- + 1 file changed, 9 insertions(+), 5 deletions(-) + +diff --git a/io/channel-socket.c b/io/channel-socket.c +index fbd2214d20..7490e5943d 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -579,11 +579,17 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + memcpy(CMSG_DATA(cmsg), fds, fdsize); + } + +-#ifdef QEMU_MSG_ZEROCOPY + if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { ++#ifdef QEMU_MSG_ZEROCOPY + sflags = MSG_ZEROCOPY; +- } ++#else ++ /* ++ * We expect QIOChannel class entry point to have ++ * blocked this code path already ++ */ ++ g_assert_not_reached(); + #endif ++ } + + retry: + ret = sendmsg(sioc->fd, &msg, sflags); +@@ -593,15 +599,13 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + return QIO_CHANNEL_ERR_BLOCK; + case EINTR: + goto retry; +-#ifdef QEMU_MSG_ZEROCOPY + case ENOBUFS: +- if (sflags & MSG_ZEROCOPY) { ++ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { + error_setg_errno(errp, errno, + "Process can't lock enough memory for using MSG_ZEROCOPY"); + return -1; + } + break; +-#endif + } + + error_setg_errno(errp, errno, +-- +2.35.3 + diff --git a/kvm-RHEL-disable-seqpacket-for-vhost-vsock-device-in-rhe.patch b/kvm-RHEL-disable-seqpacket-for-vhost-vsock-device-in-rhe.patch new file mode 100644 index 0000000..27cc557 --- /dev/null +++ b/kvm-RHEL-disable-seqpacket-for-vhost-vsock-device-in-rhe.patch @@ -0,0 +1,107 @@ +From e0e4f01c6f4fb5881960f72ae4e80951b711131e Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Thu, 24 Mar 2022 16:04:57 +0100 +Subject: [PATCH 1/5] RHEL: disable "seqpacket" for "vhost-vsock-device" in + rhel8.6.0 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Stefano Garzarella +RH-MergeRequest: 136: RHEL: disable "seqpacket" for "vhost-vsock-device" in rhel8.6.0 [rhel-8.7.0] +RH-Commit: [1/1] d82ea09e123679521503689f7d9af1c03dc71bfc +RH-Bugzilla: 2068202 +RH-Acked-by: Jason Wang +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Stefan Hajnoczi + +vhost-vsock device in RHEL 8 kernels doesn't support seqpacket. +To avoid problems when migrating a VM from RHEL 9 host, we need to +disable it in rhel8-* machine types. + +Signed-off-by: Stefano Garzarella +--- + hw/core/machine.c | 10 ++++++++++ + hw/i386/pc_piix.c | 2 ++ + hw/i386/pc_q35.c | 2 ++ + hw/s390x/s390-virtio-ccw.c | 1 + + include/hw/boards.h | 3 +++ + 5 files changed, 18 insertions(+) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 024b025fc2..76fcabec7a 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -37,6 +37,16 @@ + #include "hw/virtio/virtio.h" + #include "hw/virtio/virtio-pci.h" + ++GlobalProperty hw_compat_rhel_8_6[] = { ++ /* hw_compat_rhel_8_6 bz 2068202 */ ++ /* ++ * vhost-vsock device in RHEL 8 kernels doesn't support seqpacket, so ++ * we need do disable it downstream on the latest hw_compat_rhel_8. ++ */ ++ { "vhost-vsock-device", "seqpacket", "off" }, ++}; ++const size_t hw_compat_rhel_8_6_len = G_N_ELEMENTS(hw_compat_rhel_8_6); ++ + /* + * Mostly the same as hw_compat_6_0 and hw_compat_6_1 + */ +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index f03a8f0db8..ab6d03e07a 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -998,6 +998,8 @@ static void pc_machine_rhel760_options(MachineClass *m) + pcmc->kvmclock_create_always = false; + /* From pc_i440fx_5_1_machine_options() */ + pcmc->pci_root_uid = 1; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_6, ++ hw_compat_rhel_8_6_len); + compat_props_add(m->compat_props, hw_compat_rhel_8_5, + hw_compat_rhel_8_5_len); + compat_props_add(m->compat_props, pc_rhel_8_5_compat, +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 5559261d9e..882fe7a68d 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -658,6 +658,8 @@ static void pc_q35_machine_rhel860_options(MachineClass *m) + m->desc = "RHEL-8.6.0 PC (Q35 + ICH9, 2009)"; + pcmc->smbios_stream_product = "RHEL-AV"; + pcmc->smbios_stream_version = "8.6.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_6, ++ hw_compat_rhel_8_6_len); + } + + DEFINE_PC_MACHINE(q35_rhel860, "pc-q35-rhel8.6.0", pc_q35_init_rhel860, +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 9795eb9406..bec270598b 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1109,6 +1109,7 @@ static void ccw_machine_rhel860_instance_options(MachineState *machine) + + static void ccw_machine_rhel860_class_options(MachineClass *mc) + { ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_6, hw_compat_rhel_8_6_len); + } + DEFINE_CCW_MACHINE(rhel860, "rhel8.6.0", true); + +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 04e8759815..4ddb798144 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -443,6 +443,9 @@ extern const size_t hw_compat_2_2_len; + extern GlobalProperty hw_compat_2_1[]; + extern const size_t hw_compat_2_1_len; + ++extern GlobalProperty hw_compat_rhel_8_6[]; ++extern const size_t hw_compat_rhel_8_6_len; ++ + extern GlobalProperty hw_compat_rhel_8_5[]; + extern const size_t hw_compat_rhel_8_5_len; + +-- +2.27.0 + diff --git a/kvm-RHEL-only-AArch64-Drop-unsupported-CPU-types.patch b/kvm-RHEL-only-AArch64-Drop-unsupported-CPU-types.patch new file mode 100644 index 0000000..8a0aeb0 --- /dev/null +++ b/kvm-RHEL-only-AArch64-Drop-unsupported-CPU-types.patch @@ -0,0 +1,237 @@ +From 055edf068196622a3e1868c9e4c991d410272a6d Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Wed, 15 Jun 2022 15:28:27 +0200 +Subject: [PATCH 03/18] RHEL-only: AArch64: Drop unsupported CPU types +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Daniel P. Berrangé +RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models +RH-Commit: [3/6] 21f54c86dc87e5e75a64459b5a385686bc09640c (berrange/centos-src-qemu) +RH-Bugzilla: 2060839 +RH-Acked-by: Thomas Huth +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2066824 +Upstream Status: RHEL only + +We only need to support AArch64 cpu types and we only need three +types: + 1) A base type to use with TCG, i.e. a cpu type with only base + features. 'cortex-a57' serves this role and is currently used + by libguestfs. + 2) The 'max' type, which is for both KVM and TCG and is good for + tests that just specify 'max' but run under both. 'max' with + TCG also provides the VM with all the CPU features TCG + supports, which is good for VMs that need features not + provided by the basic cortex-a57. + 3) The host type which is used with KVM. + +Signed-off-by: Andrew Jones +--- + hw/arm/virt.c | 4 ++++ + target/arm/cpu64.c | 6 ++++++ + target/arm/cpu_tcg.c | 12 ++---------- + tests/qtest/arm-cpu-features.c | 6 ++++++ + 4 files changed, 18 insertions(+), 10 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 95d012d6eb..74119976d3 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -239,12 +239,16 @@ static const int a15irqmap[] = { + }; + + static const char *valid_cpus[] = { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + ARM_CPU_TYPE_NAME("cortex-a7"), + ARM_CPU_TYPE_NAME("cortex-a15"), + ARM_CPU_TYPE_NAME("cortex-a53"), ++#endif /* disabled for RHEL */ + ARM_CPU_TYPE_NAME("cortex-a57"), ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + ARM_CPU_TYPE_NAME("cortex-a72"), + ARM_CPU_TYPE_NAME("a64fx"), ++#endif /* disabled for RHEL */ + ARM_CPU_TYPE_NAME("host"), + ARM_CPU_TYPE_NAME("max"), + }; +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index eb44c05822..e80b831073 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -146,6 +146,7 @@ static void aarch64_a57_initfn(Object *obj) + define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void aarch64_a53_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -249,6 +250,7 @@ static void aarch64_a72_initfn(Object *obj) + cpu->gic_vprebits = 5; + define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo); + } ++#endif /* disabled for RHEL */ + + void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) + { +@@ -923,6 +925,7 @@ static void aarch64_max_initfn(Object *obj) + qdev_property_add_static(DEVICE(obj), &arm_cpu_lpa2_property); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void aarch64_a64fx_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -969,12 +972,15 @@ static void aarch64_a64fx_initfn(Object *obj) + + /* TODO: Add A64FX specific HPC extension registers */ + } ++#endif /* disabled for RHEL */ + + static const ARMCPUInfo aarch64_cpus[] = { + { .name = "cortex-a57", .initfn = aarch64_a57_initfn }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { .name = "cortex-a53", .initfn = aarch64_a53_initfn }, + { .name = "cortex-a72", .initfn = aarch64_a72_initfn }, + { .name = "a64fx", .initfn = aarch64_a64fx_initfn }, ++#endif /* disabled for RHEL */ + { .name = "max", .initfn = aarch64_max_initfn }, + #if defined(CONFIG_KVM) || defined(CONFIG_HVF) + { .name = "host", .initfn = aarch64_host_initfn }, +diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c +index 3826fa5122..74727fc92c 100644 +--- a/target/arm/cpu_tcg.c ++++ b/target/arm/cpu_tcg.c +@@ -19,10 +19,10 @@ + #include "hw/boards.h" + #endif + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + /* CPU models. These are not needed for the AArch64 linux-user build. */ + #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) + static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) + { +@@ -376,7 +376,6 @@ static void cortex_a9_initfn(Object *obj) + cpu->ccsidr[1] = 0x200fe019; /* 16k L1 icache. */ + define_arm_cp_regs(cpu, cortexa9_cp_reginfo); + } +-#endif /* disabled for RHEL */ + + #ifndef CONFIG_USER_ONLY + static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) +@@ -402,7 +401,6 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { + REGINFO_SENTINEL + }; + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void cortex_a7_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -448,7 +446,6 @@ static void cortex_a7_initfn(Object *obj) + cpu->ccsidr[2] = 0x711fe07a; /* 4096K L2 unified cache */ + define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */ + } +-#endif /* disabled for RHEL */ + + static void cortex_a15_initfn(Object *obj) + { +@@ -492,7 +489,6 @@ static void cortex_a15_initfn(Object *obj) + define_arm_cp_regs(cpu, cortexa15_cp_reginfo); + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void cortex_m0_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -933,7 +929,6 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) + + cc->gdb_core_xml_file = "arm-m-profile.xml"; + } +-#endif /* disabled for RHEL */ + + #ifndef TARGET_AARCH64 + /* +@@ -1013,7 +1008,6 @@ static void arm_max_initfn(Object *obj) + #endif /* !TARGET_AARCH64 */ + + static const ARMCPUInfo arm_tcg_cpus[] = { +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + { .name = "arm926", .initfn = arm926_initfn }, + { .name = "arm946", .initfn = arm946_initfn }, + { .name = "arm1026", .initfn = arm1026_initfn }, +@@ -1029,9 +1023,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { + { .name = "cortex-a7", .initfn = cortex_a7_initfn }, + { .name = "cortex-a8", .initfn = cortex_a8_initfn }, + { .name = "cortex-a9", .initfn = cortex_a9_initfn }, +-#endif /* disabled for RHEL */ + { .name = "cortex-a15", .initfn = cortex_a15_initfn }, +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + { .name = "cortex-m0", .initfn = cortex_m0_initfn, + .class_init = arm_v7m_class_init }, + { .name = "cortex-m3", .initfn = cortex_m3_initfn, +@@ -1062,7 +1054,6 @@ static const ARMCPUInfo arm_tcg_cpus[] = { + { .name = "pxa270-b1", .initfn = pxa270b1_initfn }, + { .name = "pxa270-c0", .initfn = pxa270c0_initfn }, + { .name = "pxa270-c5", .initfn = pxa270c5_initfn }, +-#endif /* disabled for RHEL */ + #ifndef TARGET_AARCH64 + { .name = "max", .initfn = arm_max_initfn }, + #endif +@@ -1090,3 +1081,4 @@ static void arm_tcg_cpu_register_types(void) + type_init(arm_tcg_cpu_register_types) + + #endif /* !CONFIG_USER_ONLY || !TARGET_AARCH64 */ ++#endif /* disabled for RHEL */ +diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c +index f76652143a..fe2a0a070d 100644 +--- a/tests/qtest/arm-cpu-features.c ++++ b/tests/qtest/arm-cpu-features.c +@@ -440,8 +440,10 @@ static void test_query_cpu_model_expansion(const void *data) + assert_error(qts, "host", "The CPU type 'host' requires KVM", NULL); + + /* Test expected feature presence/absence for some cpu types */ ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + assert_has_feature_enabled(qts, "cortex-a15", "pmu"); + assert_has_not_feature(qts, "cortex-a15", "aarch64"); ++#endif /* disabled for RHEL */ + + /* Enabling and disabling pmu should always work. */ + assert_has_feature_enabled(qts, "max", "pmu"); +@@ -458,6 +460,7 @@ static void test_query_cpu_model_expansion(const void *data) + assert_has_feature_enabled(qts, "cortex-a57", "pmu"); + assert_has_feature_enabled(qts, "cortex-a57", "aarch64"); + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + assert_has_feature_enabled(qts, "a64fx", "pmu"); + assert_has_feature_enabled(qts, "a64fx", "aarch64"); + /* +@@ -470,6 +473,7 @@ static void test_query_cpu_model_expansion(const void *data) + "{ 'sve384': true }"); + assert_error(qts, "a64fx", "cannot enable sve640", + "{ 'sve640': true }"); ++#endif /* disabled for RHEL */ + + sve_tests_default(qts, "max"); + pauth_tests_default(qts, "max"); +@@ -505,9 +509,11 @@ static void test_query_cpu_model_expansion_kvm(const void *data) + QDict *resp; + char *error; + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + assert_error(qts, "cortex-a15", + "We cannot guarantee the CPU type 'cortex-a15' works " + "with KVM on this host", NULL); ++#endif /* disabled for RHEL */ + + assert_has_feature_enabled(qts, "host", "aarch64"); + +-- +2.35.3 + diff --git a/kvm-RHEL-only-tests-avocado-Switch-aarch64-tests-from-a5.patch b/kvm-RHEL-only-tests-avocado-Switch-aarch64-tests-from-a5.patch new file mode 100644 index 0000000..a1cc4c7 --- /dev/null +++ b/kvm-RHEL-only-tests-avocado-Switch-aarch64-tests-from-a5.patch @@ -0,0 +1,95 @@ +From d710394f68eb0b6116dd8ac76f619c192e0d5972 Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Wed, 15 Jun 2022 15:28:27 +0200 +Subject: [PATCH 02/18] RHEL-only: tests/avocado: Switch aarch64 tests from a53 + to a57 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Daniel P. Berrangé +RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models +RH-Commit: [2/6] e85ef69b42c411a6997e4da10ba05176368769b3 (berrange/centos-src-qemu) +RH-Bugzilla: 2060839 +RH-Acked-by: Thomas Huth +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2066824 +Upstream Status: RHEL only + +We plan to remove the cortex-a53 from the supported cpu types. Switch +all avocado tests that use it to the cortex-a57, which will work the +same and we intend to keep. We don't want to try and upstream this +change since the better upstream change would be to switch from the +a53 to 'max', but the upstream tests also need to use later guest +kernels to use 'max' (see qemu upstream commit 0942820408dc +("hw/arm/virt: Disable LPA2 for -machine virt-6.2") + +Signed-off-by: Andrew Jones +--- + tests/avocado/replay_kernel.py | 2 +- + tests/avocado/reverse_debugging.py | 2 +- + tests/avocado/tcg_plugins.py | 6 +++--- + 3 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/tests/avocado/replay_kernel.py b/tests/avocado/replay_kernel.py +index 0b2b0dc692..3a7b5f0748 100644 +--- a/tests/avocado/replay_kernel.py ++++ b/tests/avocado/replay_kernel.py +@@ -147,7 +147,7 @@ def test_aarch64_virt(self): + """ + :avocado: tags=arch:aarch64 + :avocado: tags=machine:virt +- :avocado: tags=cpu:cortex-a53 ++ :avocado: tags=cpu:cortex-a57 + """ + kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora' + '/linux/releases/29/Everything/aarch64/os/images/pxeboot' +diff --git a/tests/avocado/reverse_debugging.py b/tests/avocado/reverse_debugging.py +index d2921e70c3..66d185ed42 100644 +--- a/tests/avocado/reverse_debugging.py ++++ b/tests/avocado/reverse_debugging.py +@@ -198,7 +198,7 @@ def test_aarch64_virt(self): + """ + :avocado: tags=arch:aarch64 + :avocado: tags=machine:virt +- :avocado: tags=cpu:cortex-a53 ++ :avocado: tags=cpu:cortex-a57 + """ + kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora' + '/linux/releases/29/Everything/aarch64/os/images/pxeboot' +diff --git a/tests/avocado/tcg_plugins.py b/tests/avocado/tcg_plugins.py +index 642d2e49e3..93b3afd823 100644 +--- a/tests/avocado/tcg_plugins.py ++++ b/tests/avocado/tcg_plugins.py +@@ -68,7 +68,7 @@ def test_aarch64_virt_insn(self): + :avocado: tags=accel:tcg + :avocado: tags=arch:aarch64 + :avocado: tags=machine:virt +- :avocado: tags=cpu:cortex-a53 ++ :avocado: tags=cpu:cortex-a57 + """ + kernel_path = self._grab_aarch64_kernel() + kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + +@@ -94,7 +94,7 @@ def test_aarch64_virt_insn_icount(self): + :avocado: tags=accel:tcg + :avocado: tags=arch:aarch64 + :avocado: tags=machine:virt +- :avocado: tags=cpu:cortex-a53 ++ :avocado: tags=cpu:cortex-a57 + """ + kernel_path = self._grab_aarch64_kernel() + kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + +@@ -120,7 +120,7 @@ def test_aarch64_virt_mem_icount(self): + :avocado: tags=accel:tcg + :avocado: tags=arch:aarch64 + :avocado: tags=machine:virt +- :avocado: tags=cpu:cortex-a53 ++ :avocado: tags=cpu:cortex-a57 + """ + kernel_path = self._grab_aarch64_kernel() + kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + +-- +2.35.3 + diff --git a/kvm-Revert-globally-limit-the-maximum-number-of-CPUs.patch b/kvm-Revert-globally-limit-the-maximum-number-of-CPUs.patch new file mode 100644 index 0000000..7740d0b --- /dev/null +++ b/kvm-Revert-globally-limit-the-maximum-number-of-CPUs.patch @@ -0,0 +1,58 @@ +From 5ab8613582fd56b847fe75750acb5b7255900b35 Mon Sep 17 00:00:00 2001 +From: Vitaly Kuznetsov +Date: Thu, 9 Jun 2022 11:55:15 +0200 +Subject: [PATCH 15/16] Revert "globally limit the maximum number of CPUs" + +RH-Author: Vitaly Kuznetsov +RH-MergeRequest: 99: Revert "globally limit the maximum number of CPUs" +RH-Commit: [1/1] 13100d4a2209b2190a3654c1f9cf4ebade1e8d24 (vkuznets/qemu-kvm-c9s) +RH-Bugzilla: 2094270 +RH-Acked-by: Andrew Jones +RH-Acked-by: Cornelia Huck +RH-Acked-by: Thomas Huth + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094270 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45871149 +Upstream Status: RHEL-only +Tested: with upstream kernel + +Downstream QEMU carries a patch that sets the hard limit of possible vCPUs +to the value that the KVM code of the kernel recommends as soft limit. +Upstream KVM code has been changed recently to not use an arbitrary soft +limit anymore, but to cap the value on the amount of available physical +CPUs of the host. This defeats the purpose of the downstream change in +QEMU completely. Drop the downstream-only patch to allow CPU overcommit. + +This reverts commit 6669f6fa677d43144f39d6ad59725b7ba622f1c2. + +Signed-off-by: Vitaly Kuznetsov +--- + accel/kvm/kvm-all.c | 12 ------------ + 1 file changed, 12 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index fdf0e4d429..5f1377ca04 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2430,18 +2430,6 @@ static int kvm_init(MachineState *ms) + soft_vcpus_limit = kvm_recommended_vcpus(s); + hard_vcpus_limit = kvm_max_vcpus(s); + +-#ifdef HOST_PPC64 +- /* +- * On POWER, the kernel advertises a soft limit based on the +- * number of CPU threads on the host. We want to allow exceeding +- * this for testing purposes, so we don't want to set hard limit +- * to soft limit as on x86. +- */ +-#else +- /* RHEL doesn't support nr_vcpus > soft_vcpus_limit */ +- hard_vcpus_limit = soft_vcpus_limit; +-#endif +- + while (nc->name) { + if (nc->num > soft_vcpus_limit) { + warn_report("Number of %s cpus requested (%d) exceeds " +-- +2.31.1 + diff --git a/kvm-Revert-migration-Simplify-unqueue_page.patch b/kvm-Revert-migration-Simplify-unqueue_page.patch new file mode 100644 index 0000000..f5c97f6 --- /dev/null +++ b/kvm-Revert-migration-Simplify-unqueue_page.patch @@ -0,0 +1,134 @@ +From 5ea59b17866add54e5ae8c76d3cb472c67e1fa91 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Tue, 2 Aug 2022 08:19:49 +0200 +Subject: [PATCH 32/32] Revert "migration: Simplify unqueue_page()" + +RH-Author: Thomas Huth +RH-MergeRequest: 112: Fix postcopy migration on s390x +RH-Commit: [2/2] 3913c9ed3f27f4b66245913da29d0c46db0c6567 (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2099934 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Peter Xu + +This reverts commit cfd66f30fb0f735df06ff4220e5000290a43dad3. + +The simplification of unqueue_page() introduced a bug that sometimes +breaks migration on s390x hosts. + +The problem is not fully understood yet, but since we are already in +the freeze for QEMU 7.1 and we need something working there, let's +revert this patch for the upcoming release. The optimization can be +redone later again in a proper way if necessary. + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2099934 +Signed-off-by: Thomas Huth +Message-Id: <20220802061949.331576-1-thuth@redhat.com> +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 777f53c75983dd10756f5dbfc8af50fe11da81c1) +Conflicts: + migration/trace-events + (trivial contextual conflict) +Signed-off-by: Thomas Huth +--- + migration/ram.c | 37 ++++++++++++++++++++++++++----------- + migration/trace-events | 3 ++- + 2 files changed, 28 insertions(+), 12 deletions(-) + +diff --git a/migration/ram.c b/migration/ram.c +index fb6db54642..ee40e4a718 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1548,7 +1548,6 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset) + { + struct RAMSrcPageRequest *entry; + RAMBlock *block = NULL; +- size_t page_size; + + if (!postcopy_has_request(rs)) { + return NULL; +@@ -1565,13 +1564,10 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset) + entry = QSIMPLEQ_FIRST(&rs->src_page_requests); + block = entry->rb; + *offset = entry->offset; +- page_size = qemu_ram_pagesize(block); +- /* Each page request should only be multiple page size of the ramblock */ +- assert((entry->len % page_size) == 0); + +- if (entry->len > page_size) { +- entry->len -= page_size; +- entry->offset += page_size; ++ if (entry->len > TARGET_PAGE_SIZE) { ++ entry->len -= TARGET_PAGE_SIZE; ++ entry->offset += TARGET_PAGE_SIZE; + } else { + memory_region_unref(block->mr); + QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req); +@@ -1579,9 +1575,6 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset) + migration_consume_urgent_request(); + } + +- trace_unqueue_page(block->idstr, *offset, +- test_bit((*offset >> TARGET_PAGE_BITS), block->bmap)); +- + return block; + } + +@@ -1956,8 +1949,30 @@ static bool get_queued_page(RAMState *rs, PageSearchStatus *pss) + { + RAMBlock *block; + ram_addr_t offset; ++ bool dirty; ++ ++ do { ++ block = unqueue_page(rs, &offset); ++ /* ++ * We're sending this page, and since it's postcopy nothing else ++ * will dirty it, and we must make sure it doesn't get sent again ++ * even if this queue request was received after the background ++ * search already sent it. ++ */ ++ if (block) { ++ unsigned long page; ++ ++ page = offset >> TARGET_PAGE_BITS; ++ dirty = test_bit(page, block->bmap); ++ if (!dirty) { ++ trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset, ++ page); ++ } else { ++ trace_get_queued_page(block->idstr, (uint64_t)offset, page); ++ } ++ } + +- block = unqueue_page(rs, &offset); ++ } while (block && !dirty); + + if (!block) { + /* +diff --git a/migration/trace-events b/migration/trace-events +index 1aec580e92..09d61ed1f4 100644 +--- a/migration/trace-events ++++ b/migration/trace-events +@@ -85,6 +85,8 @@ put_qlist_end(const char *field_name, const char *vmsd_name) "%s(%s)" + qemu_file_fclose(void) "" + + # ram.c ++get_queued_page(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx" ++get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx" + migration_bitmap_sync_start(void) "" + migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64 + migration_bitmap_clear_dirty(char *str, uint64_t start, uint64_t size, unsigned long page) "rb %s start 0x%"PRIx64" size 0x%"PRIx64" page 0x%lx" +@@ -110,7 +112,6 @@ ram_save_iterate_big_wait(uint64_t milliconds, int iterations) "big wait: %" PRI + ram_load_complete(int ret, uint64_t seq_iter) "exit_code %d seq iteration %" PRIu64 + ram_write_tracking_ramblock_start(const char *block_id, size_t page_size, void *addr, size_t length) "%s: page_size: %zu addr: %p length: %zu" + ram_write_tracking_ramblock_stop(const char *block_id, size_t page_size, void *addr, size_t length) "%s: page_size: %zu addr: %p length: %zu" +-unqueue_page(char *block, uint64_t offset, bool dirty) "ramblock '%s' offset 0x%"PRIx64" dirty %d" + + # multifd.c + multifd_new_send_channel_async(uint8_t id) "channel %u" +-- +2.31.1 + diff --git a/kvm-Revert-redhat-Add-hw_compat_4_2_extra-and-apply-to-u.patch b/kvm-Revert-redhat-Add-hw_compat_4_2_extra-and-apply-to-u.patch new file mode 100644 index 0000000..56af50f --- /dev/null +++ b/kvm-Revert-redhat-Add-hw_compat_4_2_extra-and-apply-to-u.patch @@ -0,0 +1,93 @@ +From e626dc16d130c724c400b99a93daad0a9abeae59 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 22 Mar 2022 19:23:36 -0400 +Subject: [PATCH 01/18] Revert "redhat: Add hw_compat_4_2_extra and apply to + upstream machines" + +RH-Author: Jon Maloy +RH-MergeRequest: 131: Revert "redhat: Add hw_compat_4_2_extra and apply to upstream machines" +RH-Commit: [1/3] 47b7d9e5062f5e215d5ed1a3ecdc1a87ac3fa630 (jmaloy/qemu-kvm) +RH-Bugzilla: 2062613 +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +BZ: https://bugzilla.redhat.com/2062613 +UPSTREAM: no +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=44038000 + +commit dc2e9ec1e014950c7918e23a3e9b0096b34a4a92 +Author: Dr. David Alan Gilbert +Date: Wed Mar 9 10:31:53 2022 +0000 + + Revert "redhat: Add hw_compat_4_2_extra and apply to upstream machines" + + This reverts commit 66882f9a3230246409f3918424aca26add5c034a. + We no longer need these compat machines it was added for. + + Signed-off-by: Dr. David Alan Gilbert + +(cherry picked from commit dc2e9ec1e014950c7918e23a3e9b0096b34a4a92) +Signed-off-by: Jon Maloy +--- + hw/i386/pc.c | 12 ------------ + hw/i386/pc_piix.c | 6 ------ + include/hw/i386/pc.h | 3 --- + 3 files changed, 21 deletions(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 4c08a1971c..357257349b 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -670,18 +670,6 @@ GlobalProperty pc_rhel_7_0_compat[] = { + }; + const size_t pc_rhel_7_0_compat_len = G_N_ELEMENTS(pc_rhel_7_0_compat); + +-/* +- * RHEL: These properties only apply to the RHEL exported machine types +- * pc-4.2/2.11 for the purpose to have a limited upstream machines support +- * which can be migrated to RHEL. Let's avoid touching hw_compat_4_2 directly +- * so that we can have some isolation against the upstream code. +- */ +-GlobalProperty hw_compat_4_2_extra[] = { +- /* By default enlarge the default virtio-net-pci ROM to 512KB. */ +- { "virtio-net-pci", "romsize", "0x80000" }, +-}; +-const size_t hw_compat_4_2_extra_len = G_N_ELEMENTS(hw_compat_4_2_extra); +- + GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled) + { + GSIState *s; +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index c30057c443..7b7076cbc7 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -531,12 +531,6 @@ static void pc_i440fx_4_2_machine_options(MachineClass *m) + * supported by RHEL, even if exported. + */ + m->deprecation_reason = "Not supported by RHEL"; +- /* +- * RHEL: Specific compat properties to have limited support for upstream +- * machines exported. +- */ +- compat_props_add(m->compat_props, hw_compat_4_2_extra, +- hw_compat_4_2_extra_len); + } + + /* RHEL: Export pc-4.2 */ +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 9e8bfb69f8..4a593acb50 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -325,9 +325,6 @@ extern const size_t pc_rhel_7_1_compat_len; + extern GlobalProperty pc_rhel_7_0_compat[]; + extern const size_t pc_rhel_7_0_compat_len; + +-extern GlobalProperty hw_compat_4_2_extra[]; +-extern const size_t hw_compat_4_2_extra_len; +- + /* Helper for setting model-id for CPU models that changed model-id + * depending on QEMU versions up to QEMU 2.4. + */ +-- +2.27.0 + diff --git a/kvm-Revert-redhat-Enable-FDC-device-for-upstream-machine.patch b/kvm-Revert-redhat-Enable-FDC-device-for-upstream-machine.patch new file mode 100644 index 0000000..1b2051a --- /dev/null +++ b/kvm-Revert-redhat-Enable-FDC-device-for-upstream-machine.patch @@ -0,0 +1,53 @@ +From 5bf8f1d69fea1225e927fbb3efe549a2a9d47d92 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 22 Mar 2022 19:23:36 -0400 +Subject: [PATCH 02/18] Revert "redhat: Enable FDC device for upstream machines + too" + +RH-Author: Jon Maloy +RH-MergeRequest: 131: Revert "redhat: Add hw_compat_4_2_extra and apply to upstream machines" +RH-Commit: [2/3] 4e3c945e3de9bb9d9a6d24115f0719168c9669fe (jmaloy/qemu-kvm) +RH-Bugzilla: 2062613 +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +BZ: https://bugzilla.redhat.com/2062613 +UPSTREAM: no +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=44038000 + +commit 597cb6ca1da4a3eea77c1e4928f55203a1d5c70c +Author: Dr. David Alan Gilbert +Date: Wed Mar 9 10:32:39 2022 +0000 + + Revert "redhat: Enable FDC device for upstream machines too" + + This reverts commit c4d1aa8bf21fe98da94a9cff30b7c25bed12c17f. + We no longer need these compat machines it was added for. + + Signed-off-by: Dr. David Alan Gilbert + +(cherry picked from commit 597cb6ca1da4a3eea77c1e4928f55203a1d5c70c) +Signed-off-by: Jon Maloy +--- + hw/block/fdc.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +diff --git a/hw/block/fdc.c b/hw/block/fdc.c +index 63042ef030..97fa6de423 100644 +--- a/hw/block/fdc.c ++++ b/hw/block/fdc.c +@@ -2341,10 +2341,7 @@ void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, Error **errp) + + /* Restricted for Red Hat Enterprise Linux: */ + MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); +- if (!strstr(mc->name, "-rhel7.") && +- /* Exported two upstream machine types allows FDC too */ +- strcmp(mc->name, "pc-i440fx-4.2") && +- strcmp(mc->name, "pc-i440fx-2.11")) { ++ if (!strstr(mc->name, "-rhel7.")) { + error_setg(errp, "Device %s is not supported with machine type %s", + object_get_typename(OBJECT(dev)), mc->name); + return; +-- +2.27.0 + diff --git a/kvm-Revert-redhat-Expose-upstream-machines-pc-4.2-and-pc.patch b/kvm-Revert-redhat-Expose-upstream-machines-pc-4.2-and-pc.patch new file mode 100644 index 0000000..27e3dc9 --- /dev/null +++ b/kvm-Revert-redhat-Expose-upstream-machines-pc-4.2-and-pc.patch @@ -0,0 +1,191 @@ +From ee3cae3bb349469edcf725a1c5161521e95dcb9f Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 22 Mar 2022 19:23:36 -0400 +Subject: [PATCH 03/18] Revert "redhat: Expose upstream machines pc-4.2 and + pc-2.11" + +RH-Author: Jon Maloy +RH-MergeRequest: 131: Revert "redhat: Add hw_compat_4_2_extra and apply to upstream machines" +RH-Commit: [3/3] 35cee68034580f81b3aa916921eecd2fdfa7dd15 (jmaloy/qemu-kvm) +RH-Bugzilla: 2062613 +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +BZ: https://bugzilla.redhat.com/2062613 +UPSTREAM: no +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=44038000 + +commit f3b50d6d4ae0be9e64aafe6a15f5423bab4899e9 +Author: Dr. David Alan Gilbert +Date: Wed Mar 9 10:34:58 2022 +0000 + + Revert "redhat: Expose upstream machines pc-4.2 and pc-2.11" + This reverts commit 618e2424edba499d52cd26cf8363bc2dd85ef149. + We no longer need these compat machines. + + Signed-off-by: Dr. David Alan Gilbert + +(cherry picked from commit f3b50d6d4ae0be9e64aafe6a15f5423bab4899e9) +Signed-off-by: Jon Maloy +--- + hw/i386/pc_piix.c | 37 ------------------------------------- + 1 file changed, 37 deletions(-) + +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 7b7076cbc7..f03a8f0db8 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -315,14 +315,6 @@ static void pc_init1(MachineState *machine, + * hw_compat_*, pc_compat_*, or * pc_*_machine_options(). + */ + +-/* +- * NOTE! Not all the upstream machine types are disabled for RHEL. For +- * providing a very limited support for upstream machine types, pc machines +- * 2.11 and 4.2 are exposed explicitly. This will make the below "#if" macros +- * a bit messed up, but please read this comment first so that we can have a +- * rough understanding of what we're going to do. +- */ +- + #if 0 /* Disabled for Red Hat Enterprise Linux */ + static void pc_compat_2_3_fn(MachineState *machine) + { +@@ -399,8 +391,6 @@ static void pc_xen_hvm_init(MachineState *machine) + } + #endif + +-#endif /* Disabled for Red Hat Enterprise Linux */ +- + #define DEFINE_I440FX_MACHINE(suffix, name, compatfn, optionfn) \ + static void pc_init_##suffix(MachineState *machine) \ + { \ +@@ -465,10 +455,8 @@ static void pc_i440fx_6_0_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_6_0, pc_compat_6_0_len); + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v6_0, "pc-i440fx-6.0", NULL, + pc_i440fx_6_0_machine_options); +-#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_5_2_machine_options(MachineClass *m) + { +@@ -479,10 +467,8 @@ static void pc_i440fx_5_2_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_5_2, pc_compat_5_2_len); + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v5_2, "pc-i440fx-5.2", NULL, + pc_i440fx_5_2_machine_options); +-#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_5_1_machine_options(MachineClass *m) + { +@@ -497,10 +483,8 @@ static void pc_i440fx_5_1_machine_options(MachineClass *m) + pcmc->pci_root_uid = 1; + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v5_1, "pc-i440fx-5.1", NULL, + pc_i440fx_5_1_machine_options); +-#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_5_0_machine_options(MachineClass *m) + { +@@ -513,10 +497,8 @@ static void pc_i440fx_5_0_machine_options(MachineClass *m) + m->auto_enable_numa_with_memdev = false; + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v5_0, "pc-i440fx-5.0", NULL, + pc_i440fx_5_0_machine_options); +-#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_4_2_machine_options(MachineClass *m) + { +@@ -525,15 +507,8 @@ static void pc_i440fx_4_2_machine_options(MachineClass *m) + m->is_default = false; + compat_props_add(m->compat_props, hw_compat_4_2, hw_compat_4_2_len); + compat_props_add(m->compat_props, pc_compat_4_2, pc_compat_4_2_len); +- +- /* +- * RHEL: Mark all upstream machines as deprecated because they're not +- * supported by RHEL, even if exported. +- */ +- m->deprecation_reason = "Not supported by RHEL"; + } + +-/* RHEL: Export pc-4.2 */ + DEFINE_I440FX_MACHINE(v4_2, "pc-i440fx-4.2", NULL, + pc_i440fx_4_2_machine_options); + +@@ -546,10 +521,8 @@ static void pc_i440fx_4_1_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_4_1, pc_compat_4_1_len); + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v4_1, "pc-i440fx-4.1", NULL, + pc_i440fx_4_1_machine_options); +-#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_4_0_machine_options(MachineClass *m) + { +@@ -562,10 +535,8 @@ static void pc_i440fx_4_0_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_4_0, pc_compat_4_0_len); + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v4_0, "pc-i440fx-4.0", NULL, + pc_i440fx_4_0_machine_options); +-#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_3_1_machine_options(MachineClass *m) + { +@@ -581,10 +552,8 @@ static void pc_i440fx_3_1_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_3_1, pc_compat_3_1_len); + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v3_1, "pc-i440fx-3.1", NULL, + pc_i440fx_3_1_machine_options); +-#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_3_0_machine_options(MachineClass *m) + { +@@ -593,10 +562,8 @@ static void pc_i440fx_3_0_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_3_0, pc_compat_3_0_len); + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v3_0, "pc-i440fx-3.0", NULL, + pc_i440fx_3_0_machine_options); +-#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_2_12_machine_options(MachineClass *m) + { +@@ -605,10 +572,8 @@ static void pc_i440fx_2_12_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_2_12, pc_compat_2_12_len); + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v2_12, "pc-i440fx-2.12", NULL, + pc_i440fx_2_12_machine_options); +-#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_2_11_machine_options(MachineClass *m) + { +@@ -617,11 +582,9 @@ static void pc_i440fx_2_11_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_2_11, pc_compat_2_11_len); + } + +-/* RHEL: Export pc-2.11 */ + DEFINE_I440FX_MACHINE(v2_11, "pc-i440fx-2.11", NULL, + pc_i440fx_2_11_machine_options); + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void pc_i440fx_2_10_machine_options(MachineClass *m) + { + pc_i440fx_2_11_machine_options(m); +-- +2.27.0 + diff --git a/kvm-Revert-virtio-scsi-Reject-scsi-cd-if-data-plane-enab.patch b/kvm-Revert-virtio-scsi-Reject-scsi-cd-if-data-plane-enab.patch new file mode 100644 index 0000000..e8eb35d --- /dev/null +++ b/kvm-Revert-virtio-scsi-Reject-scsi-cd-if-data-plane-enab.patch @@ -0,0 +1,51 @@ +From 733acef2caea0758edd74fb634b095ce09bf5914 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Mon, 9 May 2022 03:46:23 -0400 +Subject: [PATCH 15/16] Revert "virtio-scsi: Reject scsi-cd if data plane + enabled [RHEL only]" + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 91: Revert "virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only]" +RH-Commit: [1/1] 1af55d792bc9166e5c86272afe8093c76ab41bb4 (eesposit/qemu-kvm) +RH-Bugzilla: 1995710 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi + +This reverts commit 4e17b1126e. + +Over time AioContext usage and coverage has increased, and now block +backend is capable of handling AioContext change upon eject and insert. +Therefore the above downstream-only commit is not necessary anymore, +and can be safely reverted. + +X-downstream-only: true + +Signed-off-by: Emanuele Giuseppe Esposito +--- + hw/scsi/virtio-scsi.c | 9 --------- + 1 file changed, 9 deletions(-) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index 2450c9438c..db54d104be 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -937,15 +937,6 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, + AioContext *old_context; + int ret; + +- /* XXX: Remove this check once block backend is capable of handling +- * AioContext change upon eject/insert. +- * s->ctx is NULL if ioeventfd is off, s->ctx is qemu_get_aio_context() if +- * data plane is not used, both cases are safe for scsi-cd. */ +- if (s->ctx && s->ctx != qemu_get_aio_context() && +- object_dynamic_cast(OBJECT(dev), "scsi-cd")) { +- error_setg(errp, "scsi-cd is not supported by data plane"); +- return; +- } + if (s->ctx && !s->dataplane_fenced) { + if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { + return; +-- +2.31.1 + diff --git a/kvm-acpi-fix-OEM-ID-OEM-Table-ID-padding.patch b/kvm-acpi-fix-OEM-ID-OEM-Table-ID-padding.patch new file mode 100644 index 0000000..9d2594f --- /dev/null +++ b/kvm-acpi-fix-OEM-ID-OEM-Table-ID-padding.patch @@ -0,0 +1,78 @@ +From af082f3499de265d123157d097b5c84981e0aa63 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 30 Mar 2022 14:52:34 -0400 +Subject: [PATCH 15/18] acpi: fix OEM ID/OEM Table ID padding + +RH-Author: Jon Maloy +RH-MergeRequest: 141: acpi: fix QEMU crash when started with SLIC table +RH-Commit: [7/10] 51ea859cbe12b5a902d529ab589d18757d98f71d (jmaloy/qemu-kvm) +RH-Bugzilla: 2062611 +RH-Acked-by: Igor Mammedov + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2062611 +Upstream: Merged + +commit 748c030f360a940fe0c9382c8ca1649096c3a80d +Author: Igor Mammedov +Date: Wed Jan 12 08:03:31 2022 -0500 + + acpi: fix OEM ID/OEM Table ID padding + + Commit [2] broke original '\0' padding of OEM ID and OEM Table ID + fields in headers of ACPI tables. While it doesn't have impact on + default values since QEMU uses 6 and 8 characters long values + respectively, it broke usecase where IDs are provided on QEMU CLI. + It shouldn't affect guest (but may cause licensing verification + issues in guest OS). + One of the broken usecases is user supplied SLIC table with IDs + shorter than max possible length, where [2] mangles IDs with extra + spaces in RSDT and FADT tables whereas guest OS expects those to + mirror the respective values of the used SLIC table. + + Fix it by replacing whitespace padding with '\0' padding in + accordance with [1] and expectations of guest OS + + 1) ACPI spec, v2.0b + 17.2 AML Grammar Definition + ... + //OEM ID of up to 6 characters. If the OEM ID is + //shorter than 6 characters, it can be terminated + //with a NULL character. + + 2) + Fixes: 602b458201 ("acpi: Permit OEM ID and OEM table ID fields to be changed") + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/707 + Reported-by: Dmitry V. Orekhov + Signed-off-by: Igor Mammedov + Cc: qemu-stable@nongnu.org + Message-Id: <20220112130332.1648664-4-imammedo@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + Reviewed-by: Ani Sinha + Tested-by: Dmitry V. Orekhov dima.orekhov@gmail.com + +(cherry picked from commit 748c030f360a940fe0c9382c8ca1649096c3a80d) +Signed-off-by: Jon Maloy +--- + hw/acpi/aml-build.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c +index b3b3310df3..65148d5b9d 100644 +--- a/hw/acpi/aml-build.c ++++ b/hw/acpi/aml-build.c +@@ -1724,9 +1724,9 @@ void acpi_table_begin(AcpiTable *desc, GArray *array) + build_append_int_noprefix(array, 0, 4); /* Length */ + build_append_int_noprefix(array, desc->rev, 1); /* Revision */ + build_append_int_noprefix(array, 0, 1); /* Checksum */ +- build_append_padded_str(array, desc->oem_id, 6, ' '); /* OEMID */ ++ build_append_padded_str(array, desc->oem_id, 6, '\0'); /* OEMID */ + /* OEM Table ID */ +- build_append_padded_str(array, desc->oem_table_id, 8, ' '); ++ build_append_padded_str(array, desc->oem_table_id, 8, '\0'); + build_append_int_noprefix(array, 1, 4); /* OEM Revision */ + g_array_append_vals(array, ACPI_BUILD_APPNAME8, 4); /* Creator ID */ + build_append_int_noprefix(array, 1, 4); /* Creator Revision */ +-- +2.27.0 + diff --git a/kvm-acpi-fix-QEMU-crash-when-started-with-SLIC-table.patch b/kvm-acpi-fix-QEMU-crash-when-started-with-SLIC-table.patch new file mode 100644 index 0000000..a6b1151 --- /dev/null +++ b/kvm-acpi-fix-QEMU-crash-when-started-with-SLIC-table.patch @@ -0,0 +1,108 @@ +From 4e8fb957a349558648d5cddb80a89460bc97439e Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 30 Mar 2022 14:52:34 -0400 +Subject: [PATCH 09/18] acpi: fix QEMU crash when started with SLIC table +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 141: acpi: fix QEMU crash when started with SLIC table +RH-Commit: [1/10] 0c34e80346c33da4f220d9c486b120c35005144e (jmaloy/qemu-kvm) +RH-Bugzilla: 2062611 +RH-Acked-by: Igor Mammedov + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2062611 +Upstream: Merged + +commit 8cdb99af45365727ac17f45239a9b8c1d5155c6d) +Author: Igor Mammedov +Date: Mon Dec 27 14:31:17 2021 -0500 + + acpi: fix QEMU crash when started with SLIC table + + if QEMU is started with used provided SLIC table blob, + + -acpitable sig=SLIC,oem_id='CRASH ',oem_table_id="ME",oem_rev=00002210,asl_compiler_id="",asl_compiler_rev=00000000,data=/dev/null + it will assert with: + + hw/acpi/aml-build.c:61:build_append_padded_str: assertion failed: (len <= maxlen) + + and following backtrace: + + ... + build_append_padded_str (array=0x555556afe320, str=0x555556afdb2e "CRASH ME", maxlen=0x6, pad=0x20) at hw/acpi/aml-build.c:61 + acpi_table_begin (desc=0x7fffffffd1b0, array=0x555556afe320) at hw/acpi/aml-build.c:1727 + build_fadt (tbl=0x555556afe320, linker=0x555557ca3830, f=0x7fffffffd318, oem_id=0x555556afdb2e "CRASH ME", oem_table_id=0x555556afdb34 "ME") at hw/acpi/aml-build.c:2064 + ... + + which happens due to acpi_table_begin() expecting NULL terminated + oem_id and oem_table_id strings, which is normally the case, but + in case of user provided SLIC table, oem_id points to table's blob + directly and as result oem_id became longer than expected. + + Fix issue by handling oem_id consistently and make acpi_get_slic_oem() + return NULL terminated strings. + + PS: + After [1] refactoring, oem_id semantics became inconsistent, where + NULL terminated string was coming from machine and old way pointer + into byte array coming from -acpitable option. That used to work + since build_header() wasn't expecting NULL terminated string and + blindly copied the 1st 6 bytes only. + + However commit [2] broke that by replacing build_header() with + acpi_table_begin(), which was expecting NULL terminated string + and was checking oem_id size. + + 1) 602b45820 ("acpi: Permit OEM ID and OEM table ID fields to be changed") + 2) + Fixes: 4b56e1e4eb08 ("acpi: build_fadt: use acpi_table_begin()/acpi_table_end() instead of build_header()") + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/786 + Signed-off-by: Igor Mammedov + Message-Id: <20211227193120.1084176-2-imammedo@redhat.com> + Reviewed-by: Philippe Mathieu-Daudé + Tested-by: Denis Lisov + Tested-by: Alexander Tsoy + Cc: qemu-stable@nongnu.org + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit 8cdb99af45365727ac17f45239a9b8c1d5155c6d) +Signed-off-by: Jon Maloy +--- + hw/acpi/core.c | 4 ++-- + hw/i386/acpi-build.c | 2 ++ + 2 files changed, 4 insertions(+), 2 deletions(-) + +diff --git a/hw/acpi/core.c b/hw/acpi/core.c +index 1e004d0078..3e811bf03c 100644 +--- a/hw/acpi/core.c ++++ b/hw/acpi/core.c +@@ -345,8 +345,8 @@ int acpi_get_slic_oem(AcpiSlicOem *oem) + struct acpi_table_header *hdr = (void *)(u - sizeof(hdr->_length)); + + if (memcmp(hdr->sig, "SLIC", 4) == 0) { +- oem->id = hdr->oem_id; +- oem->table_id = hdr->oem_table_id; ++ oem->id = g_strndup(hdr->oem_id, 6); ++ oem->table_id = g_strndup(hdr->oem_table_id, 8); + return 0; + } + } +diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c +index a4478e77b7..acc4869db0 100644 +--- a/hw/i386/acpi-build.c ++++ b/hw/i386/acpi-build.c +@@ -2726,6 +2726,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) + + /* Cleanup memory that's no longer used. */ + g_array_free(table_offsets, true); ++ g_free(slic_oem.id); ++ g_free(slic_oem.table_id); + } + + static void acpi_ram_update(MemoryRegion *mr, GArray *data) +-- +2.27.0 + diff --git a/kvm-acpi-pcihp-pcie-set-power-on-cap-on-parent-slot.patch b/kvm-acpi-pcihp-pcie-set-power-on-cap-on-parent-slot.patch new file mode 100644 index 0000000..2be41b6 --- /dev/null +++ b/kvm-acpi-pcihp-pcie-set-power-on-cap-on-parent-slot.patch @@ -0,0 +1,140 @@ +From c9ceb175667cdeead59384a97a812367ae19c570 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 23 Mar 2022 13:21:40 -0400 +Subject: [PATCH 06/18] acpi: pcihp: pcie: set power on cap on parent slot + +RH-Author: Jon Maloy +RH-MergeRequest: 134: pci: expose TYPE_XIO3130_DOWNSTREAM name +RH-Commit: [2/2] d883872647a6e90ec573140b2c171f3f53b600ab (jmaloy/qemu-kvm) +RH-Bugzilla: 2062610 +RH-Acked-by: Igor Mammedov +RH-Acked-by: Gerd Hoffmann + +BZ: https://bugzilla.redhat.com/2062610 +UPSTREAM: merged +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=44038138 + +commit 6b0969f1ec825984cd74619f0730be421b0c46fb +Author: Igor Mammedov +Date: Tue Mar 1 10:11:59 2022 -0500 + + acpi: pcihp: pcie: set power on cap on parent slot + + on creation a PCIDevice has power turned on at the end of pci_qdev_realize() + however later on if PCIe slot isn't populated with any children + it's power is turned off. It's fine if native hotplug is used + as plug callback will power slot on among other things. + However when ACPI hotplug is enabled it replaces native PCIe plug + callbacks with ACPI specific ones (acpi_pcihp_device_*plug_cb) and + as result slot stays powered off. It works fine as ACPI hotplug + on guest side takes care of enumerating/initializing hotplugged + device. But when later guest is migrated, call chain introduced by] + commit d5daff7d312 (pcie: implement slot power control for pcie root ports) + + pcie_cap_slot_post_load() + -> pcie_cap_update_power() + -> pcie_set_power_device() + -> pci_set_power() + -> pci_update_mappings() + + will disable earlier initialized BARs for the hotplugged device + in powered off slot due to commit 23786d13441 (pci: implement power state) + which disables BARs if power is off. + + Fix it by setting PCI_EXP_SLTCTL_PCC to PCI_EXP_SLTCTL_PWR_ON + on slot (root port/downstream port) at the time a device + hotplugged into it. As result PCI_EXP_SLTCTL_PWR_ON is migrated + to target and above call chain keeps device plugged into it + powered on. + + Fixes: d5daff7d312 ("pcie: implement slot power control for pcie root ports") + Fixes: 23786d13441 ("pci: implement power state") + Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2053584 + Suggested-by: "Michael S. Tsirkin" + Signed-off-by: Igor Mammedov + Message-Id: <20220301151200.3507298-3-imammedo@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit 6b0969f1ec825984cd74619f0730be421b0c46fb) +Signed-off-by: Jon Maloy +--- + hw/acpi/pcihp.c | 12 +++++++++++- + hw/pci/pcie.c | 11 +++++++++++ + include/hw/pci/pcie.h | 1 + + 3 files changed, 23 insertions(+), 1 deletion(-) + +diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c +index a5e182dd3a..be0e846b34 100644 +--- a/hw/acpi/pcihp.c ++++ b/hw/acpi/pcihp.c +@@ -32,6 +32,7 @@ + #include "hw/pci/pci_bridge.h" + #include "hw/pci/pci_host.h" + #include "hw/pci/pcie_port.h" ++#include "hw/pci-bridge/xio3130_downstream.h" + #include "hw/i386/acpi-build.h" + #include "hw/acpi/acpi.h" + #include "hw/pci/pci_bus.h" +@@ -341,6 +342,8 @@ void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, + { + PCIDevice *pdev = PCI_DEVICE(dev); + int slot = PCI_SLOT(pdev->devfn); ++ PCIDevice *bridge; ++ PCIBus *bus; + int bsel; + + /* Don't send event when device is enabled during qemu machine creation: +@@ -370,7 +373,14 @@ void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, + return; + } + +- bsel = acpi_pcihp_get_bsel(pci_get_bus(pdev)); ++ bus = pci_get_bus(pdev); ++ bridge = pci_bridge_get_device(bus); ++ if (object_dynamic_cast(OBJECT(bridge), TYPE_PCIE_ROOT_PORT) || ++ object_dynamic_cast(OBJECT(bridge), TYPE_XIO3130_DOWNSTREAM)) { ++ pcie_cap_slot_enable_power(bridge); ++ } ++ ++ bsel = acpi_pcihp_get_bsel(bus); + g_assert(bsel >= 0); + s->acpi_pcihp_pci_status[bsel].up |= (1U << slot); + acpi_send_event(DEVICE(hotplug_dev), ACPI_PCI_HOTPLUG_STATUS); +diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c +index d7d73a31e4..996f0e24fe 100644 +--- a/hw/pci/pcie.c ++++ b/hw/pci/pcie.c +@@ -366,6 +366,17 @@ static void hotplug_event_clear(PCIDevice *dev) + } + } + ++void pcie_cap_slot_enable_power(PCIDevice *dev) ++{ ++ uint8_t *exp_cap = dev->config + dev->exp.exp_cap; ++ uint32_t sltcap = pci_get_long(exp_cap + PCI_EXP_SLTCAP); ++ ++ if (sltcap & PCI_EXP_SLTCAP_PCP) { ++ pci_set_word_by_mask(exp_cap + PCI_EXP_SLTCTL, ++ PCI_EXP_SLTCTL_PCC, PCI_EXP_SLTCTL_PWR_ON); ++ } ++} ++ + static void pcie_set_power_device(PCIBus *bus, PCIDevice *dev, void *opaque) + { + bool *power = opaque; +diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h +index 6063bee0ec..c27368d077 100644 +--- a/include/hw/pci/pcie.h ++++ b/include/hw/pci/pcie.h +@@ -112,6 +112,7 @@ void pcie_cap_slot_write_config(PCIDevice *dev, + uint32_t addr, uint32_t val, int len); + int pcie_cap_slot_post_load(void *opaque, int version_id); + void pcie_cap_slot_push_attention_button(PCIDevice *dev); ++void pcie_cap_slot_enable_power(PCIDevice *dev); + + void pcie_cap_root_init(PCIDevice *dev); + void pcie_cap_root_reset(PCIDevice *dev); +-- +2.27.0 + diff --git a/kvm-acpi-validate-hotplug-selector-on-access.patch b/kvm-acpi-validate-hotplug-selector-on-access.patch new file mode 100644 index 0000000..d18989a --- /dev/null +++ b/kvm-acpi-validate-hotplug-selector-on-access.patch @@ -0,0 +1,51 @@ +From 529a5d908f5d16714b8ae0a51eaaaa84994dfae8 Mon Sep 17 00:00:00 2001 +From: "Michael S. Tsirkin" +Date: Tue, 21 Dec 2021 09:45:44 -0500 +Subject: [PATCH 1/2] acpi: validate hotplug selector on access +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 97: acpi: validate hotplug selector on access +RH-Commit: [1/1] 79bcfb0df0091e2b716d2e1c545f047b3409c26c (jmaloy/qemu-kvm) +RH-Bugzilla: 2036580 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Igor Mammedov + +When bus is looked up on a pci write, we didn't +validate that the lookup succeeded. +Fuzzers thus can trigger QEMU crash by dereferencing the NULL +bus pointer. + +Fixes: b32bd763a1 ("pci: introduce acpi-index property for PCI device") +Fixes: CVE-2021-4158 +Cc: "Igor Mammedov" +Fixes: https://gitlab.com/qemu-project/qemu/-/issues/770 +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Ani Sinha +(cherry picked from commit 9bd6565ccee68f72d5012e24646e12a1c662827e) +Signed-off-by: Jon Maloy +--- + hw/acpi/pcihp.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c +index 30405b5113..a5e182dd3a 100644 +--- a/hw/acpi/pcihp.c ++++ b/hw/acpi/pcihp.c +@@ -491,6 +491,9 @@ static void pci_write(void *opaque, hwaddr addr, uint64_t data, + } + + bus = acpi_pcihp_find_hotplug_bus(s, s->hotplug_select); ++ if (!bus) { ++ break; ++ } + QTAILQ_FOREACH_SAFE(kid, &bus->qbus.children, sibling, next) { + Object *o = OBJECT(kid->child); + PCIDevice *dev = PCI_DEVICE(o); +-- +2.27.0 + diff --git a/kvm-block-Lock-AioContext-for-drain_end-in-blockdev-reop.patch b/kvm-block-Lock-AioContext-for-drain_end-in-blockdev-reop.patch new file mode 100644 index 0000000..eb0f3cf --- /dev/null +++ b/kvm-block-Lock-AioContext-for-drain_end-in-blockdev-reop.patch @@ -0,0 +1,63 @@ +From b21fa5ecd9acf2b91839a2915fb4bb39dac4c803 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 3 Feb 2022 15:05:33 +0100 +Subject: [PATCH 2/5] block: Lock AioContext for drain_end in blockdev-reopen + +RH-Author: Kevin Wolf +RH-MergeRequest: 142: block: Lock AioContext for drain_end in blockdev-reopen +RH-Commit: [1/2] 98de3b5987f88ea6b4b503f623d6c4475574e037 +RH-Bugzilla: 2067118 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Hanna Reitz + +bdrv_subtree_drained_end() requires the caller to hold the AioContext +lock for the drained node. Not doing this for nodes outside of the main +AioContext leads to crashes when AIO_WAIT_WHILE() needs to wait and +tries to temporarily release the lock. + +Fixes: 3908b7a8994fa5ef7a89aa58cd5a02fc58141592 +Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2046659 +Reported-by: Qing Wang +Signed-off-by: Kevin Wolf +Message-Id: <20220203140534.36522-2-kwolf@redhat.com> +Reviewed-by: Hanna Reitz +Signed-off-by: Kevin Wolf +(cherry picked from commit aba8205be0707b9d108e32254e186ba88107a869) +Signed-off-by: Kevin Wolf +--- + blockdev.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +diff --git a/blockdev.c b/blockdev.c +index b35072644e..565f6a81fd 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -3562,6 +3562,7 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) + { + BlockReopenQueue *queue = NULL; + GSList *drained = NULL; ++ GSList *p; + + /* Add each one of the BDS that we want to reopen to the queue */ + for (; reopen_list != NULL; reopen_list = reopen_list->next) { +@@ -3611,7 +3612,15 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) + + fail: + bdrv_reopen_queue_free(queue); +- g_slist_free_full(drained, (GDestroyNotify) bdrv_subtree_drained_end); ++ for (p = drained; p; p = p->next) { ++ BlockDriverState *bs = p->data; ++ AioContext *ctx = bdrv_get_aio_context(bs); ++ ++ aio_context_acquire(ctx); ++ bdrv_subtree_drained_end(bs); ++ aio_context_release(ctx); ++ } ++ g_slist_free(drained); + } + + void qmp_blockdev_del(const char *node_name, Error **errp) +-- +2.27.0 + diff --git a/kvm-block-backend-prevent-dangling-BDS-pointers-across-a.patch b/kvm-block-backend-prevent-dangling-BDS-pointers-across-a.patch new file mode 100644 index 0000000..52d37d8 --- /dev/null +++ b/kvm-block-backend-prevent-dangling-BDS-pointers-across-a.patch @@ -0,0 +1,129 @@ +From bf4c15a3debbe68b6eb25c52174843470a9c014f Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 11 Jan 2022 15:36:12 +0000 +Subject: [PATCH 3/6] block-backend: prevent dangling BDS pointers across + aio_poll() + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 109: block-backend: prevent dangling BDS pointers across aio_poll() +RH-Commit: [1/2] da5a59eddff0dc10be7de8e291fa675143d11d73 +RH-Bugzilla: 2021778 2036178 +RH-Acked-by: Hanna Reitz +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Kevin Wolf + +The BlockBackend root child can change when aio_poll() is invoked. This +happens when a temporary filter node is removed upon blockjob +completion, for example. + +Functions in block/block-backend.c must be aware of this when using a +blk_bs() pointer across aio_poll() because the BlockDriverState refcnt +may reach 0, resulting in a stale pointer. + +One example is scsi_device_purge_requests(), which calls blk_drain() to +wait for in-flight requests to cancel. If the backup blockjob is active, +then the BlockBackend root child is a temporary filter BDS owned by the +blockjob. The blockjob can complete during bdrv_drained_begin() and the +last reference to the BDS is released when the temporary filter node is +removed. This results in a use-after-free when blk_drain() calls +bdrv_drained_end(bs) on the dangling pointer. + +Explicitly hold a reference to bs across block APIs that invoke +aio_poll(). + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2021778 +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2036178 +Signed-off-by: Stefan Hajnoczi +Message-Id: <20220111153613.25453-2-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 1e3552dbd28359d35967b7c28dc86cde1bc29205) +Signed-off-by: Stefan Hajnoczi +--- + block/block-backend.c | 19 +++++++++++++++++-- + 1 file changed, 17 insertions(+), 2 deletions(-) + +diff --git a/block/block-backend.c b/block/block-backend.c +index 12ef80ea17..23e727199b 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -822,16 +822,22 @@ BlockBackend *blk_by_public(BlockBackendPublic *public) + void blk_remove_bs(BlockBackend *blk) + { + ThrottleGroupMember *tgm = &blk->public.throttle_group_member; +- BlockDriverState *bs; + BdrvChild *root; + + notifier_list_notify(&blk->remove_bs_notifiers, blk); + if (tgm->throttle_state) { +- bs = blk_bs(blk); ++ BlockDriverState *bs = blk_bs(blk); ++ ++ /* ++ * Take a ref in case blk_bs() changes across bdrv_drained_begin(), for ++ * example, if a temporary filter node is removed by a blockjob. ++ */ ++ bdrv_ref(bs); + bdrv_drained_begin(bs); + throttle_group_detach_aio_context(tgm); + throttle_group_attach_aio_context(tgm, qemu_get_aio_context()); + bdrv_drained_end(bs); ++ bdrv_unref(bs); + } + + blk_update_root_state(blk); +@@ -1705,6 +1711,7 @@ void blk_drain(BlockBackend *blk) + BlockDriverState *bs = blk_bs(blk); + + if (bs) { ++ bdrv_ref(bs); + bdrv_drained_begin(bs); + } + +@@ -1714,6 +1721,7 @@ void blk_drain(BlockBackend *blk) + + if (bs) { + bdrv_drained_end(bs); ++ bdrv_unref(bs); + } + } + +@@ -2044,10 +2052,13 @@ static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context, + int ret; + + if (bs) { ++ bdrv_ref(bs); ++ + if (update_root_node) { + ret = bdrv_child_try_set_aio_context(bs, new_context, blk->root, + errp); + if (ret < 0) { ++ bdrv_unref(bs); + return ret; + } + } +@@ -2057,6 +2068,8 @@ static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context, + throttle_group_attach_aio_context(tgm, new_context); + bdrv_drained_end(bs); + } ++ ++ bdrv_unref(bs); + } + + blk->ctx = new_context; +@@ -2326,11 +2339,13 @@ void blk_io_limits_disable(BlockBackend *blk) + ThrottleGroupMember *tgm = &blk->public.throttle_group_member; + assert(tgm->throttle_state); + if (bs) { ++ bdrv_ref(bs); + bdrv_drained_begin(bs); + } + throttle_group_unregister_tgm(tgm); + if (bs) { + bdrv_drained_end(bs); ++ bdrv_unref(bs); + } + } + +-- +2.27.0 + diff --git a/kvm-block-io-Update-BSC-only-if-want_zero-is-true.patch b/kvm-block-io-Update-BSC-only-if-want_zero-is-true.patch new file mode 100644 index 0000000..c1ee128 --- /dev/null +++ b/kvm-block-io-Update-BSC-only-if-want_zero-is-true.patch @@ -0,0 +1,56 @@ +From 4c6eff78f4b31ec4bd7b42440396760d19fde63e Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Tue, 18 Jan 2022 17:59:59 +0100 +Subject: [PATCH 6/7] block/io: Update BSC only if want_zero is true + +RH-Author: Hanna Reitz +RH-MergeRequest: 112: block/io: Update BSC only if want_zero is true +RH-Commit: [1/2] a202de1f52110d1e871c3b5b58f2d9e9b5d17570 +RH-Bugzilla: 2041480 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf + +We update the block-status cache whenever we get new information from a +bdrv_co_block_status() call to the block driver. However, if we have +passed want_zero=false to that call, it may flag areas containing zeroes +as data, and so we would update the block-status cache with wrong +information. + +Therefore, we should not update the cache with want_zero=false. + +Reported-by: Nir Soffer +Fixes: 0bc329fbb00 ("block: block-status cache for data regions") +Reviewed-by: Nir Soffer +Cc: qemu-stable@nongnu.org +Signed-off-by: Hanna Reitz +Message-Id: <20220118170000.49423-2-hreitz@redhat.com> +Reviewed-by: Eric Blake +Signed-off-by: Eric Blake +(cherry picked from commit 113b727ce788335cf76f65355d670c9bc130fd75) +Signed-off-by: Hanna Reitz +--- + block/io.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/block/io.c b/block/io.c +index bb0a254def..4e4cb556c5 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -2497,8 +2497,12 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs, + * non-protocol nodes, and then it is never used. However, filling + * the cache requires an RCU update, so double check here to avoid + * such an update if possible. ++ * ++ * Check want_zero, because we only want to update the cache when we ++ * have accurate information about what is zero and what is data. + */ +- if (ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) && ++ if (want_zero && ++ ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) && + QLIST_EMPTY(&bs->children)) + { + /* +-- +2.27.0 + diff --git a/kvm-block-nbd-Assert-there-are-no-timers-when-closed.patch b/kvm-block-nbd-Assert-there-are-no-timers-when-closed.patch new file mode 100644 index 0000000..324021b --- /dev/null +++ b/kvm-block-nbd-Assert-there-are-no-timers-when-closed.patch @@ -0,0 +1,52 @@ +From d5a85fcf996948d1154e88e9ee3b4e8c64ec2694 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Fri, 4 Feb 2022 12:10:08 +0100 +Subject: [PATCH 2/6] block/nbd: Assert there are no timers when closed + +RH-Author: Hanna Reitz +RH-MergeRequest: 117: block/nbd: Handle AioContext changes +RH-Commit: [2/6] 995795ae9844a7d2b28cb1e57fd7fe81482d0205 +RH-Bugzilla: 2035185 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +Our two timers must not remain armed beyond nbd_clear_bdrvstate(), or +they will access freed data when they fire. + +This patch is separate from the patches that actually fix the issue +(HEAD^^ and HEAD^) so that you can run the associated regression iotest +(281) on a configuration that reproducibly exposes the bug. + +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Reitz +Signed-off-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 8a39c381e5e407d2fe5500324323f90a8540fa90) + +Conflict: +- block/nbd.c: open_timer was introduced after the 6.2 release (for + nbd's @open-timeout parameter), and has not been backported, so drop + the assertion that it is NULL + +Signed-off-by: Hanna Reitz +--- + block/nbd.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/block/nbd.c b/block/nbd.c +index b8e5a9b4cc..aab20125d8 100644 +--- a/block/nbd.c ++++ b/block/nbd.c +@@ -108,6 +108,9 @@ static void nbd_clear_bdrvstate(BlockDriverState *bs) + + yank_unregister_instance(BLOCKDEV_YANK_INSTANCE(bs->node_name)); + ++ /* Must not leave timers behind that would access freed data */ ++ assert(!s->reconnect_delay_timer); ++ + object_unref(OBJECT(s->tlscreds)); + qapi_free_SocketAddress(s->saddr); + s->saddr = NULL; +-- +2.27.0 + diff --git a/kvm-block-nbd-Delete-reconnect-delay-timer-when-done.patch b/kvm-block-nbd-Delete-reconnect-delay-timer-when-done.patch new file mode 100644 index 0000000..7d1c000 --- /dev/null +++ b/kvm-block-nbd-Delete-reconnect-delay-timer-when-done.patch @@ -0,0 +1,54 @@ +From 8e23c0f208c6bd5bb64c4f6e4863b93fa6f4e9de Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Fri, 4 Feb 2022 12:10:06 +0100 +Subject: [PATCH 1/6] block/nbd: Delete reconnect delay timer when done + +RH-Author: Hanna Reitz +RH-MergeRequest: 117: block/nbd: Handle AioContext changes +RH-Commit: [1/6] 70814602a8a43a7c14857d76266d82b1aa5174a9 +RH-Bugzilla: 2035185 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +We start the reconnect delay timer to cancel the reconnection attempt +after a while. Once nbd_co_do_establish_connection() has returned, this +attempt is over, and we no longer need the timer. + +Delete it before returning from nbd_reconnect_attempt(), so that it does +not persist beyond the I/O request that was paused for reconnecting; we +do not want it to fire in a drained section, because all sort of things +can happen in such a section (e.g. the AioContext might be changed, and +we do not want the timer to fire in the wrong context; or the BDS might +even be deleted, and so the timer CB would access already-freed data). + +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Reitz +Signed-off-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 3ce1fc16bad9c3f8b7b10b451a224d6d76e5c551) +Signed-off-by: Hanna Reitz +--- + block/nbd.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/block/nbd.c b/block/nbd.c +index 5ef462db1b..b8e5a9b4cc 100644 +--- a/block/nbd.c ++++ b/block/nbd.c +@@ -353,6 +353,13 @@ static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s) + } + + nbd_co_do_establish_connection(s->bs, NULL); ++ ++ /* ++ * The reconnect attempt is done (maybe successfully, maybe not), so ++ * we no longer need this timer. Delete it so it will not outlive ++ * this I/O request (so draining removes all timers). ++ */ ++ reconnect_delay_timer_del(s); + } + + static coroutine_fn int nbd_receive_replies(BDRVNBDState *s, uint64_t handle) +-- +2.27.0 + diff --git a/kvm-block-nbd-Move-s-ioc-on-AioContext-change.patch b/kvm-block-nbd-Move-s-ioc-on-AioContext-change.patch new file mode 100644 index 0000000..4cd3cce --- /dev/null +++ b/kvm-block-nbd-Move-s-ioc-on-AioContext-change.patch @@ -0,0 +1,107 @@ +From c7f63e7bbc5119d92775e20d1ebbf8280c78b732 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Fri, 4 Feb 2022 12:10:11 +0100 +Subject: [PATCH 5/6] block/nbd: Move s->ioc on AioContext change + +RH-Author: Hanna Reitz +RH-MergeRequest: 117: block/nbd: Handle AioContext changes +RH-Commit: [5/6] 107757b9fbadfb832c75521317108525daa4174e +RH-Bugzilla: 2035185 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +s->ioc must always be attached to the NBD node's AioContext. If that +context changes, s->ioc must be attached to the new context. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2033626 +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Reitz +Signed-off-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit e15f3a66c830e3fce99c9d56c493c2f7078a1225) + +Conflict: +- block/nbd.c: open_timer was added after the 6.2 release, so we need + not (and cannot) assert it is NULL here. + +Signed-off-by: Hanna Reitz +--- + block/nbd.c | 41 +++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 41 insertions(+) + +diff --git a/block/nbd.c b/block/nbd.c +index aab20125d8..a3896c7f5f 100644 +--- a/block/nbd.c ++++ b/block/nbd.c +@@ -2003,6 +2003,38 @@ static void nbd_cancel_in_flight(BlockDriverState *bs) + nbd_co_establish_connection_cancel(s->conn); + } + ++static void nbd_attach_aio_context(BlockDriverState *bs, ++ AioContext *new_context) ++{ ++ BDRVNBDState *s = bs->opaque; ++ ++ /* ++ * The reconnect_delay_timer is scheduled in I/O paths when the ++ * connection is lost, to cancel the reconnection attempt after a ++ * given time. Once this attempt is done (successfully or not), ++ * nbd_reconnect_attempt() ensures the timer is deleted before the ++ * respective I/O request is resumed. ++ * Since the AioContext can only be changed when a node is drained, ++ * the reconnect_delay_timer cannot be active here. ++ */ ++ assert(!s->reconnect_delay_timer); ++ ++ if (s->ioc) { ++ qio_channel_attach_aio_context(s->ioc, new_context); ++ } ++} ++ ++static void nbd_detach_aio_context(BlockDriverState *bs) ++{ ++ BDRVNBDState *s = bs->opaque; ++ ++ assert(!s->reconnect_delay_timer); ++ ++ if (s->ioc) { ++ qio_channel_detach_aio_context(s->ioc); ++ } ++} ++ + static BlockDriver bdrv_nbd = { + .format_name = "nbd", + .protocol_name = "nbd", +@@ -2026,6 +2058,9 @@ static BlockDriver bdrv_nbd = { + .bdrv_dirname = nbd_dirname, + .strong_runtime_opts = nbd_strong_runtime_opts, + .bdrv_cancel_in_flight = nbd_cancel_in_flight, ++ ++ .bdrv_attach_aio_context = nbd_attach_aio_context, ++ .bdrv_detach_aio_context = nbd_detach_aio_context, + }; + + static BlockDriver bdrv_nbd_tcp = { +@@ -2051,6 +2086,9 @@ static BlockDriver bdrv_nbd_tcp = { + .bdrv_dirname = nbd_dirname, + .strong_runtime_opts = nbd_strong_runtime_opts, + .bdrv_cancel_in_flight = nbd_cancel_in_flight, ++ ++ .bdrv_attach_aio_context = nbd_attach_aio_context, ++ .bdrv_detach_aio_context = nbd_detach_aio_context, + }; + + static BlockDriver bdrv_nbd_unix = { +@@ -2076,6 +2114,9 @@ static BlockDriver bdrv_nbd_unix = { + .bdrv_dirname = nbd_dirname, + .strong_runtime_opts = nbd_strong_runtime_opts, + .bdrv_cancel_in_flight = nbd_cancel_in_flight, ++ ++ .bdrv_attach_aio_context = nbd_attach_aio_context, ++ .bdrv_detach_aio_context = nbd_detach_aio_context, + }; + + static void bdrv_nbd_init(void) +-- +2.27.0 + diff --git a/kvm-block-rbd-fix-handling-of-holes-in-.bdrv_co_block_st.patch b/kvm-block-rbd-fix-handling-of-holes-in-.bdrv_co_block_st.patch new file mode 100644 index 0000000..2d8f3b4 --- /dev/null +++ b/kvm-block-rbd-fix-handling-of-holes-in-.bdrv_co_block_st.patch @@ -0,0 +1,59 @@ +From f4b7133d7aeb1d0b9115d01b5cff4df7f6b24e78 Mon Sep 17 00:00:00 2001 +From: Peter Lieven +Date: Thu, 13 Jan 2022 15:44:25 +0100 +Subject: [PATCH 5/6] block/rbd: fix handling of holes in .bdrv_co_block_status + +RH-Author: Stefano Garzarella +RH-MergeRequest: 110: block/rbd: fix handling of holes in .bdrv_co_block_status +RH-Commit: [1/2] 352656a5c77cc7855b476c3559a10c6aa64a4f58 +RH-Bugzilla: 2037135 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Reitz + +the assumption that we can't hit a hole if we do not diff against a snapshot was wrong. + +We can see a hole in an image if we diff against base if there exists an older snapshot +of the image and we have discarded blocks in the image where the snapshot has data. + +Fix this by simply handling a hole like an unallocated area. There are no callbacks +for unallocated areas so just bail out if we hit a hole. + +Fixes: 0347a8fd4c3faaedf119be04c197804be40a384b +Suggested-by: Ilya Dryomov +Cc: qemu-stable@nongnu.org +Signed-off-by: Peter Lieven +Message-Id: <20220113144426.4036493-2-pl@kamp.de> +Reviewed-by: Ilya Dryomov +Reviewed-by: Stefano Garzarella +Signed-off-by: Kevin Wolf +(cherry picked from commit 9e302f64bb407a9bb097b626da97228c2654cfee) +Signed-off-by: Stefano Garzarella +--- + block/rbd.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/block/rbd.c b/block/rbd.c +index def96292e0..20bb896c4a 100644 +--- a/block/rbd.c ++++ b/block/rbd.c +@@ -1279,11 +1279,11 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len, + RBDDiffIterateReq *req = opaque; + + assert(req->offs + req->bytes <= offs); +- /* +- * we do not diff against a snapshot so we should never receive a callback +- * for a hole. +- */ +- assert(exists); ++ ++ /* treat a hole like an unallocated area and bail out */ ++ if (!exists) { ++ return 0; ++ } + + if (!req->exists && offs > req->offs) { + /* +-- +2.27.0 + diff --git a/kvm-block-rbd-workaround-for-ceph-issue-53784.patch b/kvm-block-rbd-workaround-for-ceph-issue-53784.patch new file mode 100644 index 0000000..7e052f2 --- /dev/null +++ b/kvm-block-rbd-workaround-for-ceph-issue-53784.patch @@ -0,0 +1,103 @@ +From 8c50eedf03d8e62acd387b9aa9369dadcea9324c Mon Sep 17 00:00:00 2001 +From: Peter Lieven +Date: Thu, 13 Jan 2022 15:44:26 +0100 +Subject: [PATCH 6/6] block/rbd: workaround for ceph issue #53784 + +RH-Author: Stefano Garzarella +RH-MergeRequest: 110: block/rbd: fix handling of holes in .bdrv_co_block_status +RH-Commit: [2/2] 1384557462e89bb539d0d25a1a471ad738fb9e89 +RH-Bugzilla: 2037135 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Reitz + +librbd had a bug until early 2022 that affected all versions of ceph that +supported fast-diff. This bug results in reporting of incorrect offsets +if the offset parameter to rbd_diff_iterate2 is not object aligned. + +This patch works around this bug for pre Quincy versions of librbd. + +Fixes: 0347a8fd4c3faaedf119be04c197804be40a384b +Cc: qemu-stable@nongnu.org +Signed-off-by: Peter Lieven +Message-Id: <20220113144426.4036493-3-pl@kamp.de> +Reviewed-by: Ilya Dryomov +Reviewed-by: Stefano Garzarella +Tested-by: Stefano Garzarella +Signed-off-by: Kevin Wolf +(cherry picked from commit fc176116cdea816ceb8dd969080b2b95f58edbc0) +Signed-off-by: Stefano Garzarella +--- + block/rbd.c | 42 ++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 40 insertions(+), 2 deletions(-) + +diff --git a/block/rbd.c b/block/rbd.c +index 20bb896c4a..8f183eba2a 100644 +--- a/block/rbd.c ++++ b/block/rbd.c +@@ -1320,6 +1320,7 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, + int status, r; + RBDDiffIterateReq req = { .offs = offset }; + uint64_t features, flags; ++ uint64_t head = 0; + + assert(offset + bytes <= s->image_size); + +@@ -1347,7 +1348,43 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, + return status; + } + +- r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true, ++#if LIBRBD_VERSION_CODE < LIBRBD_VERSION(1, 17, 0) ++ /* ++ * librbd had a bug until early 2022 that affected all versions of ceph that ++ * supported fast-diff. This bug results in reporting of incorrect offsets ++ * if the offset parameter to rbd_diff_iterate2 is not object aligned. ++ * Work around this bug by rounding down the offset to object boundaries. ++ * This is OK because we call rbd_diff_iterate2 with whole_object = true. ++ * However, this workaround only works for non cloned images with default ++ * striping. ++ * ++ * See: https://tracker.ceph.com/issues/53784 ++ */ ++ ++ /* check if RBD image has non-default striping enabled */ ++ if (features & RBD_FEATURE_STRIPINGV2) { ++ return status; ++ } ++ ++#pragma GCC diagnostic push ++#pragma GCC diagnostic ignored "-Wdeprecated-declarations" ++ /* ++ * check if RBD image is a clone (= has a parent). ++ * ++ * rbd_get_parent_info is deprecated from Nautilus onwards, but the ++ * replacement rbd_get_parent is not present in Luminous and Mimic. ++ */ ++ if (rbd_get_parent_info(s->image, NULL, 0, NULL, 0, NULL, 0) != -ENOENT) { ++ return status; ++ } ++#pragma GCC diagnostic pop ++ ++ head = req.offs & (s->object_size - 1); ++ req.offs -= head; ++ bytes += head; ++#endif ++ ++ r = rbd_diff_iterate2(s->image, NULL, req.offs, bytes, true, true, + qemu_rbd_diff_iterate_cb, &req); + if (r < 0 && r != QEMU_RBD_EXIT_DIFF_ITERATE2) { + return status; +@@ -1366,7 +1403,8 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, + status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID; + } + +- *pnum = req.bytes; ++ assert(req.bytes > head); ++ *pnum = req.bytes - head; + return status; + } + +-- +2.27.0 + diff --git a/kvm-configs-devices-aarch64-softmmu-Enable-CONFIG_VIRTIO.patch b/kvm-configs-devices-aarch64-softmmu-Enable-CONFIG_VIRTIO.patch new file mode 100644 index 0000000..a948e57 --- /dev/null +++ b/kvm-configs-devices-aarch64-softmmu-Enable-CONFIG_VIRTIO.patch @@ -0,0 +1,41 @@ +From 3a0e9bb88e82cc76ca5efc0595ce94b5dc34749e Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Mon, 25 Apr 2022 13:42:46 +0800 +Subject: [PATCH 1/2] configs/devices/aarch64-softmmu: Enable CONFIG_VIRTIO_MEM + +RH-Author: Gavin Shan +RH-MergeRequest: 80: Enable virtio-mem for aarch64 +RH-Commit: [1/1] 1afbd08da6d7c860da8d617a0a932d3660514878 (gwshan/qemu-rhel-9) +RH-Bugzilla: 2044162 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Eric Auger +RH-Acked-by: David Hildenbrand + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2044162 + +This enables virtio-mem device on aarch64 since all needed commits +are ready. + + b1b87327a9 hw/arm/virt: Support for virtio-mem-pci + 1263615efe virtio-mem: Correct default THP size for ARM64 + +Signed-off-by: Gavin Shan +--- + configs/devices/aarch64-softmmu/aarch64-rh-devices.mak | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak +index 5f6ee1de5b..187938573f 100644 +--- a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak ++++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak +@@ -22,6 +22,7 @@ CONFIG_VFIO=y + CONFIG_VFIO_PCI=y + CONFIG_VIRTIO_MMIO=y + CONFIG_VIRTIO_PCI=y ++CONFIG_VIRTIO_MEM=y + CONFIG_XIO3130=y + CONFIG_NVDIMM=y + CONFIG_ACPI_APEI=y +-- +2.35.1 + diff --git a/kvm-coroutine-Rename-qemu_coroutine_inc-dec_pool_size.patch b/kvm-coroutine-Rename-qemu_coroutine_inc-dec_pool_size.patch new file mode 100644 index 0000000..c1f3683 --- /dev/null +++ b/kvm-coroutine-Rename-qemu_coroutine_inc-dec_pool_size.patch @@ -0,0 +1,101 @@ +From e3cb8849862a9f0dd20f2913d540336a037d43c7 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 10 May 2022 17:10:19 +0200 +Subject: [PATCH 07/16] coroutine: Rename qemu_coroutine_inc/dec_pool_size() + +RH-Author: Kevin Wolf +RH-MergeRequest: 87: coroutine: Fix crashes due to too large pool batch size +RH-Commit: [1/2] 6389b11f70225f221784c270d9b90c1ea43ca8fb (kmwolf/centos-qemu-kvm) +RH-Bugzilla: 2079938 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella + +It's true that these functions currently affect the batch size in which +coroutines are reused (i.e. moved from the global release pool to the +allocation pool of a specific thread), but this is a bug and will be +fixed in a separate patch. + +In fact, the comment in the header file already just promises that it +influences the pool size, so reflect this in the name of the functions. +As a nice side effect, the shorter function name makes some line +wrapping unnecessary. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Message-Id: <20220510151020.105528-2-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 98e3ab35054b946f7c2aba5408822532b0920b53) +Signed-off-by: Kevin Wolf +--- + hw/block/virtio-blk.c | 6 ++---- + include/qemu/coroutine.h | 6 +++--- + util/qemu-coroutine.c | 4 ++-- + 3 files changed, 7 insertions(+), 9 deletions(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index 540c38f829..6a1cc41877 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -1215,8 +1215,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) + for (i = 0; i < conf->num_queues; i++) { + virtio_add_queue(vdev, conf->queue_size, virtio_blk_handle_output); + } +- qemu_coroutine_increase_pool_batch_size(conf->num_queues * conf->queue_size +- / 2); ++ qemu_coroutine_inc_pool_size(conf->num_queues * conf->queue_size / 2); + virtio_blk_data_plane_create(vdev, conf, &s->dataplane, &err); + if (err != NULL) { + error_propagate(errp, err); +@@ -1253,8 +1252,7 @@ static void virtio_blk_device_unrealize(DeviceState *dev) + for (i = 0; i < conf->num_queues; i++) { + virtio_del_queue(vdev, i); + } +- qemu_coroutine_decrease_pool_batch_size(conf->num_queues * conf->queue_size +- / 2); ++ qemu_coroutine_dec_pool_size(conf->num_queues * conf->queue_size / 2); + qemu_del_vm_change_state_handler(s->change); + blockdev_mark_auto_del(s->blk); + virtio_cleanup(vdev); +diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h +index c828a95ee0..5b621d1295 100644 +--- a/include/qemu/coroutine.h ++++ b/include/qemu/coroutine.h +@@ -334,12 +334,12 @@ void coroutine_fn yield_until_fd_readable(int fd); + /** + * Increase coroutine pool size + */ +-void qemu_coroutine_increase_pool_batch_size(unsigned int additional_pool_size); ++void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size); + + /** +- * Devcrease coroutine pool size ++ * Decrease coroutine pool size + */ +-void qemu_coroutine_decrease_pool_batch_size(unsigned int additional_pool_size); ++void qemu_coroutine_dec_pool_size(unsigned int additional_pool_size); + + #include "qemu/lockable.h" + +diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c +index c03b2422ff..faca0ca97c 100644 +--- a/util/qemu-coroutine.c ++++ b/util/qemu-coroutine.c +@@ -205,12 +205,12 @@ AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co) + return co->ctx; + } + +-void qemu_coroutine_increase_pool_batch_size(unsigned int additional_pool_size) ++void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size) + { + qatomic_add(&pool_batch_size, additional_pool_size); + } + +-void qemu_coroutine_decrease_pool_batch_size(unsigned int removing_pool_size) ++void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size) + { + qatomic_sub(&pool_batch_size, removing_pool_size); + } +-- +2.31.1 + diff --git a/kvm-coroutine-Revert-to-constant-batch-size.patch b/kvm-coroutine-Revert-to-constant-batch-size.patch new file mode 100644 index 0000000..2973510 --- /dev/null +++ b/kvm-coroutine-Revert-to-constant-batch-size.patch @@ -0,0 +1,138 @@ +From 345107bfd5537b51f34aaeb97d6161858bb6feee Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 10 May 2022 17:10:20 +0200 +Subject: [PATCH 08/16] coroutine: Revert to constant batch size + +RH-Author: Kevin Wolf +RH-MergeRequest: 87: coroutine: Fix crashes due to too large pool batch size +RH-Commit: [2/2] 8a8a39af873854cdc8333d1a70f3479a97c3ec7a (kmwolf/centos-qemu-kvm) +RH-Bugzilla: 2079938 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella + +Commit 4c41c69e changed the way the coroutine pool is sized because for +virtio-blk devices with a large queue size and heavy I/O, it was just +too small and caused coroutines to be deleted and reallocated soon +afterwards. The change made the size dynamic based on the number of +queues and the queue size of virtio-blk devices. + +There are two important numbers here: Slightly simplified, when a +coroutine terminates, it is generally stored in the global release pool +up to a certain pool size, and if the pool is full, it is freed. +Conversely, when allocating a new coroutine, the coroutines in the +release pool are reused if the pool already has reached a certain +minimum size (the batch size), otherwise we allocate new coroutines. + +The problem after commit 4c41c69e is that it not only increases the +maximum pool size (which is the intended effect), but also the batch +size for reusing coroutines (which is a bug). It means that in cases +with many devices and/or a large queue size (which defaults to the +number of vcpus for virtio-blk-pci), many thousand coroutines could be +sitting in the release pool without being reused. + +This is not only a waste of memory and allocations, but it actually +makes the QEMU process likely to hit the vm.max_map_count limit on Linux +because each coroutine requires two mappings (its stack and the guard +page for the stack), causing it to abort() in qemu_alloc_stack() because +when the limit is hit, mprotect() starts to fail with ENOMEM. + +In order to fix the problem, change the batch size back to 64 to avoid +uselessly accumulating coroutines in the release pool, but keep the +dynamic maximum pool size so that coroutines aren't freed too early +in heavy I/O scenarios. + +Note that this fix doesn't strictly make it impossible to hit the limit, +but this would only happen if most of the coroutines are actually in use +at the same time, not just sitting in a pool. This is the same behaviour +as we already had before commit 4c41c69e. Fully preventing this would +require allowing qemu_coroutine_create() to return an error, but it +doesn't seem to be a scenario that people hit in practice. + +Cc: qemu-stable@nongnu.org +Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2079938 +Fixes: 4c41c69e05fe28c0f95f8abd2ebf407e95a4f04b +Signed-off-by: Kevin Wolf +Message-Id: <20220510151020.105528-3-kwolf@redhat.com> +Tested-by: Hiroki Narukawa +Signed-off-by: Kevin Wolf +(cherry picked from commit 9ec7a59b5aad4b736871c378d30f5ef5ec51cb52) + +Conflicts: + util/qemu-coroutine.c + +Trivial merge conflict because we don't have commit ac387a08 downstream. + +Signed-off-by: Kevin Wolf +--- + util/qemu-coroutine.c | 22 ++++++++++++++-------- + 1 file changed, 14 insertions(+), 8 deletions(-) + +diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c +index faca0ca97c..804f672e0a 100644 +--- a/util/qemu-coroutine.c ++++ b/util/qemu-coroutine.c +@@ -20,14 +20,20 @@ + #include "qemu/coroutine_int.h" + #include "block/aio.h" + +-/** Initial batch size is 64, and is increased on demand */ ++/** ++ * The minimal batch size is always 64, coroutines from the release_pool are ++ * reused as soon as there are 64 coroutines in it. The maximum pool size starts ++ * with 64 and is increased on demand so that coroutines are not deleted even if ++ * they are not immediately reused. ++ */ + enum { +- POOL_INITIAL_BATCH_SIZE = 64, ++ POOL_MIN_BATCH_SIZE = 64, ++ POOL_INITIAL_MAX_SIZE = 64, + }; + + /** Free list to speed up creation */ + static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool); +-static unsigned int pool_batch_size = POOL_INITIAL_BATCH_SIZE; ++static unsigned int pool_max_size = POOL_INITIAL_MAX_SIZE; + static unsigned int release_pool_size; + static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool); + static __thread unsigned int alloc_pool_size; +@@ -51,7 +57,7 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque) + if (CONFIG_COROUTINE_POOL) { + co = QSLIST_FIRST(&alloc_pool); + if (!co) { +- if (release_pool_size > qatomic_read(&pool_batch_size)) { ++ if (release_pool_size > POOL_MIN_BATCH_SIZE) { + /* Slow path; a good place to register the destructor, too. */ + if (!coroutine_pool_cleanup_notifier.notify) { + coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup; +@@ -88,12 +94,12 @@ static void coroutine_delete(Coroutine *co) + co->caller = NULL; + + if (CONFIG_COROUTINE_POOL) { +- if (release_pool_size < qatomic_read(&pool_batch_size) * 2) { ++ if (release_pool_size < qatomic_read(&pool_max_size) * 2) { + QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next); + qatomic_inc(&release_pool_size); + return; + } +- if (alloc_pool_size < qatomic_read(&pool_batch_size)) { ++ if (alloc_pool_size < qatomic_read(&pool_max_size)) { + QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next); + alloc_pool_size++; + return; +@@ -207,10 +213,10 @@ AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co) + + void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size) + { +- qatomic_add(&pool_batch_size, additional_pool_size); ++ qatomic_add(&pool_max_size, additional_pool_size); + } + + void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size) + { +- qatomic_sub(&pool_batch_size, removing_pool_size); ++ qatomic_sub(&pool_max_size, removing_pool_size); + } +-- +2.31.1 + diff --git a/kvm-coroutine-ucontext-use-QEMU_DEFINE_STATIC_CO_TLS.patch b/kvm-coroutine-ucontext-use-QEMU_DEFINE_STATIC_CO_TLS.patch new file mode 100644 index 0000000..963cf04 --- /dev/null +++ b/kvm-coroutine-ucontext-use-QEMU_DEFINE_STATIC_CO_TLS.patch @@ -0,0 +1,132 @@ +From ffbd90e5f4eba620c7cd631b04f0ed31beb22ffa Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 17 May 2022 12:07:56 +0100 +Subject: [PATCH 1/6] coroutine-ucontext: use QEMU_DEFINE_STATIC_CO_TLS() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 89: coroutine: use coroutine TLS macros to protect thread-local variables +RH-Commit: [1/3] a9782fe8e919c4bd317b7e8744c7ff57d898add3 (stefanha/centos-stream-qemu-kvm) +RH-Bugzilla: 1952483 +RH-Acked-by: Hanna Reitz +RH-Acked-by: Eric Blake +RH-Acked-by: Kevin Wolf + +Thread-Local Storage variables cannot be used directly from coroutine +code because the compiler may optimize TLS variable accesses across +qemu_coroutine_yield() calls. When the coroutine is re-entered from +another thread the TLS variables from the old thread must no longer be +used. + +Use QEMU_DEFINE_STATIC_CO_TLS() for the current and leader variables. + +Signed-off-by: Stefan Hajnoczi +Message-Id: <20220307153853.602859-2-stefanha@redhat.com> +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Kevin Wolf +(cherry picked from commit 34145a307d849d0b6734d0222a7aa0bb9eef7407) +Signed-off-by: Stefan Hajnoczi +--- + util/coroutine-ucontext.c | 38 ++++++++++++++++++++++++-------------- + 1 file changed, 24 insertions(+), 14 deletions(-) + +diff --git a/util/coroutine-ucontext.c b/util/coroutine-ucontext.c +index 904b375192..127d5a13c8 100644 +--- a/util/coroutine-ucontext.c ++++ b/util/coroutine-ucontext.c +@@ -25,6 +25,7 @@ + #include "qemu/osdep.h" + #include + #include "qemu/coroutine_int.h" ++#include "qemu/coroutine-tls.h" + + #ifdef CONFIG_VALGRIND_H + #include +@@ -66,8 +67,8 @@ typedef struct { + /** + * Per-thread coroutine bookkeeping + */ +-static __thread CoroutineUContext leader; +-static __thread Coroutine *current; ++QEMU_DEFINE_STATIC_CO_TLS(Coroutine *, current); ++QEMU_DEFINE_STATIC_CO_TLS(CoroutineUContext, leader); + + /* + * va_args to makecontext() must be type 'int', so passing +@@ -97,14 +98,15 @@ static inline __attribute__((always_inline)) + void finish_switch_fiber(void *fake_stack_save) + { + #ifdef CONFIG_ASAN ++ CoroutineUContext *leaderp = get_ptr_leader(); + const void *bottom_old; + size_t size_old; + + __sanitizer_finish_switch_fiber(fake_stack_save, &bottom_old, &size_old); + +- if (!leader.stack) { +- leader.stack = (void *)bottom_old; +- leader.stack_size = size_old; ++ if (!leaderp->stack) { ++ leaderp->stack = (void *)bottom_old; ++ leaderp->stack_size = size_old; + } + #endif + #ifdef CONFIG_TSAN +@@ -161,8 +163,10 @@ static void coroutine_trampoline(int i0, int i1) + + /* Initialize longjmp environment and switch back the caller */ + if (!sigsetjmp(self->env, 0)) { +- start_switch_fiber_asan(COROUTINE_YIELD, &fake_stack_save, leader.stack, +- leader.stack_size); ++ CoroutineUContext *leaderp = get_ptr_leader(); ++ ++ start_switch_fiber_asan(COROUTINE_YIELD, &fake_stack_save, ++ leaderp->stack, leaderp->stack_size); + start_switch_fiber_tsan(&fake_stack_save, self, true); /* true=caller */ + siglongjmp(*(sigjmp_buf *)co->entry_arg, 1); + } +@@ -297,7 +301,7 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, + int ret; + void *fake_stack_save = NULL; + +- current = to_; ++ set_current(to_); + + ret = sigsetjmp(from->env, 0); + if (ret == 0) { +@@ -315,18 +319,24 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, + + Coroutine *qemu_coroutine_self(void) + { +- if (!current) { +- current = &leader.base; ++ Coroutine *self = get_current(); ++ CoroutineUContext *leaderp = get_ptr_leader(); ++ ++ if (!self) { ++ self = &leaderp->base; ++ set_current(self); + } + #ifdef CONFIG_TSAN +- if (!leader.tsan_co_fiber) { +- leader.tsan_co_fiber = __tsan_get_current_fiber(); ++ if (!leaderp->tsan_co_fiber) { ++ leaderp->tsan_co_fiber = __tsan_get_current_fiber(); + } + #endif +- return current; ++ return self; + } + + bool qemu_in_coroutine(void) + { +- return current && current->caller; ++ Coroutine *self = get_current(); ++ ++ return self && self->caller; + } +-- +2.31.1 + diff --git a/kvm-coroutine-use-QEMU_DEFINE_STATIC_CO_TLS.patch b/kvm-coroutine-use-QEMU_DEFINE_STATIC_CO_TLS.patch new file mode 100644 index 0000000..9d0f811 --- /dev/null +++ b/kvm-coroutine-use-QEMU_DEFINE_STATIC_CO_TLS.patch @@ -0,0 +1,139 @@ +From 9c2e55d25fec6ffb21e344513b7dbeed7e21f641 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 17 May 2022 12:08:04 +0100 +Subject: [PATCH 2/6] coroutine: use QEMU_DEFINE_STATIC_CO_TLS() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 89: coroutine: use coroutine TLS macros to protect thread-local variables +RH-Commit: [2/3] 68a8847e406e2eace6ddc31b0c5676a60600d606 (stefanha/centos-stream-qemu-kvm) +RH-Bugzilla: 1952483 +RH-Acked-by: Hanna Reitz +RH-Acked-by: Eric Blake +RH-Acked-by: Kevin Wolf + +Thread-Local Storage variables cannot be used directly from coroutine +code because the compiler may optimize TLS variable accesses across +qemu_coroutine_yield() calls. When the coroutine is re-entered from +another thread the TLS variables from the old thread must no longer be +used. + +Use QEMU_DEFINE_STATIC_CO_TLS() for the current and leader variables. +The alloc_pool QSLIST needs a typedef so the return value of +get_ptr_alloc_pool() can be stored in a local variable. + +One example of why this code is necessary: a coroutine that yields +before calling qemu_coroutine_create() to create another coroutine is +affected by the TLS issue. + +Signed-off-by: Stefan Hajnoczi +Message-Id: <20220307153853.602859-3-stefanha@redhat.com> +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Kevin Wolf +(cherry picked from commit ac387a08a9c9f6b36757da912f0339c25f421f90) + +Conflicts: +- Context conflicts due to commit 5411171c3ef4 ("coroutine: Revert to + constant batch size"). + +Signed-off-by: Stefan Hajnoczi +--- + util/qemu-coroutine.c | 41 ++++++++++++++++++++++++----------------- + 1 file changed, 24 insertions(+), 17 deletions(-) + +diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c +index 804f672e0a..4a8bd63ef0 100644 +--- a/util/qemu-coroutine.c ++++ b/util/qemu-coroutine.c +@@ -18,6 +18,7 @@ + #include "qemu/atomic.h" + #include "qemu/coroutine.h" + #include "qemu/coroutine_int.h" ++#include "qemu/coroutine-tls.h" + #include "block/aio.h" + + /** +@@ -35,17 +36,20 @@ enum { + static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool); + static unsigned int pool_max_size = POOL_INITIAL_MAX_SIZE; + static unsigned int release_pool_size; +-static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool); +-static __thread unsigned int alloc_pool_size; +-static __thread Notifier coroutine_pool_cleanup_notifier; ++ ++typedef QSLIST_HEAD(, Coroutine) CoroutineQSList; ++QEMU_DEFINE_STATIC_CO_TLS(CoroutineQSList, alloc_pool); ++QEMU_DEFINE_STATIC_CO_TLS(unsigned int, alloc_pool_size); ++QEMU_DEFINE_STATIC_CO_TLS(Notifier, coroutine_pool_cleanup_notifier); + + static void coroutine_pool_cleanup(Notifier *n, void *value) + { + Coroutine *co; + Coroutine *tmp; ++ CoroutineQSList *alloc_pool = get_ptr_alloc_pool(); + +- QSLIST_FOREACH_SAFE(co, &alloc_pool, pool_next, tmp) { +- QSLIST_REMOVE_HEAD(&alloc_pool, pool_next); ++ QSLIST_FOREACH_SAFE(co, alloc_pool, pool_next, tmp) { ++ QSLIST_REMOVE_HEAD(alloc_pool, pool_next); + qemu_coroutine_delete(co); + } + } +@@ -55,27 +59,30 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque) + Coroutine *co = NULL; + + if (CONFIG_COROUTINE_POOL) { +- co = QSLIST_FIRST(&alloc_pool); ++ CoroutineQSList *alloc_pool = get_ptr_alloc_pool(); ++ ++ co = QSLIST_FIRST(alloc_pool); + if (!co) { + if (release_pool_size > POOL_MIN_BATCH_SIZE) { + /* Slow path; a good place to register the destructor, too. */ +- if (!coroutine_pool_cleanup_notifier.notify) { +- coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup; +- qemu_thread_atexit_add(&coroutine_pool_cleanup_notifier); ++ Notifier *notifier = get_ptr_coroutine_pool_cleanup_notifier(); ++ if (!notifier->notify) { ++ notifier->notify = coroutine_pool_cleanup; ++ qemu_thread_atexit_add(notifier); + } + + /* This is not exact; there could be a little skew between + * release_pool_size and the actual size of release_pool. But + * it is just a heuristic, it does not need to be perfect. + */ +- alloc_pool_size = qatomic_xchg(&release_pool_size, 0); +- QSLIST_MOVE_ATOMIC(&alloc_pool, &release_pool); +- co = QSLIST_FIRST(&alloc_pool); ++ set_alloc_pool_size(qatomic_xchg(&release_pool_size, 0)); ++ QSLIST_MOVE_ATOMIC(alloc_pool, &release_pool); ++ co = QSLIST_FIRST(alloc_pool); + } + } + if (co) { +- QSLIST_REMOVE_HEAD(&alloc_pool, pool_next); +- alloc_pool_size--; ++ QSLIST_REMOVE_HEAD(alloc_pool, pool_next); ++ set_alloc_pool_size(get_alloc_pool_size() - 1); + } + } + +@@ -99,9 +106,9 @@ static void coroutine_delete(Coroutine *co) + qatomic_inc(&release_pool_size); + return; + } +- if (alloc_pool_size < qatomic_read(&pool_max_size)) { +- QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next); +- alloc_pool_size++; ++ if (get_alloc_pool_size() < qatomic_read(&pool_max_size)) { ++ QSLIST_INSERT_HEAD(get_ptr_alloc_pool(), co, pool_next); ++ set_alloc_pool_size(get_alloc_pool_size() + 1); + return; + } + } +-- +2.31.1 + diff --git a/kvm-coroutine-win32-use-QEMU_DEFINE_STATIC_CO_TLS.patch b/kvm-coroutine-win32-use-QEMU_DEFINE_STATIC_CO_TLS.patch new file mode 100644 index 0000000..1665319 --- /dev/null +++ b/kvm-coroutine-win32-use-QEMU_DEFINE_STATIC_CO_TLS.patch @@ -0,0 +1,99 @@ +From 336581e6e9ace3f1ddd24ad0a258db9785f9b0ed Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 17 May 2022 12:08:12 +0100 +Subject: [PATCH 3/6] coroutine-win32: use QEMU_DEFINE_STATIC_CO_TLS() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 89: coroutine: use coroutine TLS macros to protect thread-local variables +RH-Commit: [3/3] 55b35dfdae1bc7d6f614ac9f81a92f5c6431f713 (stefanha/centos-stream-qemu-kvm) +RH-Bugzilla: 1952483 +RH-Acked-by: Hanna Reitz +RH-Acked-by: Eric Blake +RH-Acked-by: Kevin Wolf + +Thread-Local Storage variables cannot be used directly from coroutine +code because the compiler may optimize TLS variable accesses across +qemu_coroutine_yield() calls. When the coroutine is re-entered from +another thread the TLS variables from the old thread must no longer be +used. + +Use QEMU_DEFINE_STATIC_CO_TLS() for the current and leader variables. + +I think coroutine-win32.c could get away with __thread because the +variables are only used in situations where either the stale value is +correct (current) or outside coroutine context (loading leader when +current is NULL). Due to the difficulty of being sure that this is +really safe in all scenarios it seems worth converting it anyway. + +Signed-off-by: Stefan Hajnoczi +Message-Id: <20220307153853.602859-4-stefanha@redhat.com> +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Kevin Wolf +(cherry picked from commit c1fe694357a328c807ae3cc6961c19e923448fcc) +Signed-off-by: Stefan Hajnoczi +--- + util/coroutine-win32.c | 18 +++++++++++++----- + 1 file changed, 13 insertions(+), 5 deletions(-) + +diff --git a/util/coroutine-win32.c b/util/coroutine-win32.c +index de6bd4fd3e..c02a62c896 100644 +--- a/util/coroutine-win32.c ++++ b/util/coroutine-win32.c +@@ -25,6 +25,7 @@ + #include "qemu/osdep.h" + #include "qemu-common.h" + #include "qemu/coroutine_int.h" ++#include "qemu/coroutine-tls.h" + + typedef struct + { +@@ -34,8 +35,8 @@ typedef struct + CoroutineAction action; + } CoroutineWin32; + +-static __thread CoroutineWin32 leader; +-static __thread Coroutine *current; ++QEMU_DEFINE_STATIC_CO_TLS(CoroutineWin32, leader); ++QEMU_DEFINE_STATIC_CO_TLS(Coroutine *, current); + + /* This function is marked noinline to prevent GCC from inlining it + * into coroutine_trampoline(). If we allow it to do that then it +@@ -52,7 +53,7 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, + CoroutineWin32 *from = DO_UPCAST(CoroutineWin32, base, from_); + CoroutineWin32 *to = DO_UPCAST(CoroutineWin32, base, to_); + +- current = to_; ++ set_current(to_); + + to->action = action; + SwitchToFiber(to->fiber); +@@ -89,14 +90,21 @@ void qemu_coroutine_delete(Coroutine *co_) + + Coroutine *qemu_coroutine_self(void) + { ++ Coroutine *current = get_current(); ++ + if (!current) { +- current = &leader.base; +- leader.fiber = ConvertThreadToFiber(NULL); ++ CoroutineWin32 *leader = get_ptr_leader(); ++ ++ current = &leader->base; ++ set_current(current); ++ leader->fiber = ConvertThreadToFiber(NULL); + } + return current; + } + + bool qemu_in_coroutine(void) + { ++ Coroutine *current = get_current(); ++ + return current && current->caller; + } +-- +2.31.1 + diff --git a/kvm-display-qxl-render-fix-race-condition-in-qxl_cursor-.patch b/kvm-display-qxl-render-fix-race-condition-in-qxl_cursor-.patch new file mode 100644 index 0000000..040cfe1 --- /dev/null +++ b/kvm-display-qxl-render-fix-race-condition-in-qxl_cursor-.patch @@ -0,0 +1,58 @@ +From abd84f26e0fe0bc9952d91fbd35fb3a7253cfecf Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 13 Apr 2022 20:54:45 -0400 +Subject: [PATCH 1/2] display/qxl-render: fix race condition in qxl_cursor + (CVE-2021-4207) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 152: display/qxl-render: fix race condition in qxl_cursor (CVE-2021-4207) +RH-Commit: [1/1] f05b9a956f2e0ca522b5be127beff813d04b5588 (jmaloy/qemu-kvm) +RH-Bugzilla: 2040738 +RH-Acked-by: Gerd Hoffmann +RH-Acked-by: Mauro Matteo Cascella + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2040738 +Upstream: Merged +CVE: CVE-2021-4207 + +commit 9569f5cb5b4bffa9d3ebc8ba7da1e03830a9a895 +Author: Mauro Matteo Cascella +Date: Thu Apr 7 10:11:06 2022 +0200 + + display/qxl-render: fix race condition in qxl_cursor (CVE-2021-4207) + + Avoid fetching 'width' and 'height' a second time to prevent possible + race condition. Refer to security advisory + https://starlabs.sg/advisories/22-4207/ for more information. + + Fixes: CVE-2021-4207 + Signed-off-by: Mauro Matteo Cascella + Reviewed-by: Marc-André Lureau + Message-Id: <20220407081106.343235-1-mcascell@redhat.com> + Signed-off-by: Gerd Hoffmann + +(cherry picked from commit 9569f5cb5b4bffa9d3ebc8ba7da1e03830a9a895) +Signed-off-by: Jon Maloy +--- + hw/display/qxl-render.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/display/qxl-render.c b/hw/display/qxl-render.c +index d28849b121..237ed293ba 100644 +--- a/hw/display/qxl-render.c ++++ b/hw/display/qxl-render.c +@@ -266,7 +266,7 @@ static QEMUCursor *qxl_cursor(PCIQXLDevice *qxl, QXLCursor *cursor, + } + break; + case SPICE_CURSOR_TYPE_ALPHA: +- size = sizeof(uint32_t) * cursor->header.width * cursor->header.height; ++ size = sizeof(uint32_t) * c->width * c->height; + qxl_unpack_chunks(c->data, size, qxl, &cursor->chunk, group_id); + if (qxl->debug > 2) { + cursor_print_ascii_art(c, "qxl/alpha"); +-- +2.27.0 + diff --git a/kvm-doc-Add-the-SGX-numa-description.patch b/kvm-doc-Add-the-SGX-numa-description.patch new file mode 100644 index 0000000..0bed8a6 --- /dev/null +++ b/kvm-doc-Add-the-SGX-numa-description.patch @@ -0,0 +1,77 @@ +From e8377e3f4d540e2594a50985523e87d1f3cabbc7 Mon Sep 17 00:00:00 2001 +From: Yang Zhong +Date: Mon, 1 Nov 2021 12:20:08 -0400 +Subject: [PATCH 3/7] doc: Add the SGX numa description + +RH-Author: Paul Lai +RH-MergeRequest: 111: numa: Enable numa for SGX EPC sections +RH-Commit: [3/5] 41c74688c9662b966c243566a837135ff52341c4 +RH-Bugzilla: 1518984 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Bandan Das +RH-Acked-by: Cornelia Huck + +Add the SGX numa reference command and how to check if +SGX numa is support or not with multiple EPC sections. + +Signed-off-by: Yang Zhong +Message-Id: <20211101162009.62161-5-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit d1889b36098c79e2e6ac90faf3d0dc5ec0057677) +Signed-off-by: Paul Lai +--- + docs/system/i386/sgx.rst | 31 +++++++++++++++++++++++++++---- + 1 file changed, 27 insertions(+), 4 deletions(-) + +diff --git a/docs/system/i386/sgx.rst b/docs/system/i386/sgx.rst +index f8fade5ac2..0f0a73f758 100644 +--- a/docs/system/i386/sgx.rst ++++ b/docs/system/i386/sgx.rst +@@ -141,8 +141,7 @@ To launch a SGX guest: + |qemu_system_x86| \\ + -cpu host,+sgx-provisionkey \\ + -object memory-backend-epc,id=mem1,size=64M,prealloc=on \\ +- -object memory-backend-epc,id=mem2,size=28M \\ +- -M sgx-epc.0.memdev=mem1,sgx-epc.1.memdev=mem2 ++ -M sgx-epc.0.memdev=mem1,sgx-epc.0.node=0 + + Utilizing SGX in the guest requires a kernel/OS with SGX support. + The support can be determined in guest by:: +@@ -152,8 +151,32 @@ The support can be determined in guest by:: + and SGX epc info by:: + + $ dmesg | grep sgx +- [ 1.242142] sgx: EPC section 0x180000000-0x181bfffff +- [ 1.242319] sgx: EPC section 0x181c00000-0x1837fffff ++ [ 0.182807] sgx: EPC section 0x140000000-0x143ffffff ++ [ 0.183695] sgx: [Firmware Bug]: Unable to map EPC section to online node. Fallback to the NUMA node 0. ++ ++To launch a SGX numa guest: ++ ++.. parsed-literal:: ++ ++ |qemu_system_x86| \\ ++ -cpu host,+sgx-provisionkey \\ ++ -object memory-backend-ram,size=2G,host-nodes=0,policy=bind,id=node0 \\ ++ -object memory-backend-epc,id=mem0,size=64M,prealloc=on,host-nodes=0,policy=bind \\ ++ -numa node,nodeid=0,cpus=0-1,memdev=node0 \\ ++ -object memory-backend-ram,size=2G,host-nodes=1,policy=bind,id=node1 \\ ++ -object memory-backend-epc,id=mem1,size=28M,prealloc=on,host-nodes=1,policy=bind \\ ++ -numa node,nodeid=1,cpus=2-3,memdev=node1 \\ ++ -M sgx-epc.0.memdev=mem0,sgx-epc.0.node=0,sgx-epc.1.memdev=mem1,sgx-epc.1.node=1 ++ ++and SGX epc numa info by:: ++ ++ $ dmesg | grep sgx ++ [ 0.369937] sgx: EPC section 0x180000000-0x183ffffff ++ [ 0.370259] sgx: EPC section 0x184000000-0x185bfffff ++ ++ $ dmesg | grep SRAT ++ [ 0.009981] ACPI: SRAT: Node 0 PXM 0 [mem 0x180000000-0x183ffffff] ++ [ 0.009982] ACPI: SRAT: Node 1 PXM 1 [mem 0x184000000-0x185bfffff] + + References + ---------- +-- +2.27.0 + diff --git a/kvm-hw-acpi-aml-build-Use-existing-CPU-topology-to-build.patch b/kvm-hw-acpi-aml-build-Use-existing-CPU-topology-to-build.patch new file mode 100644 index 0000000..2795dcd --- /dev/null +++ b/kvm-hw-acpi-aml-build-Use-existing-CPU-topology-to-build.patch @@ -0,0 +1,179 @@ +From 8a12049e97149056f61f7748d9869606d282d16e Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 11 May 2022 18:01:35 +0800 +Subject: [PATCH 06/16] hw/acpi/aml-build: Use existing CPU topology to build + PPTT table + +RH-Author: Gavin Shan +RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology +RH-Commit: [6/6] 53fa376531c204cf706cc1a7a0499019756106cb (gwshan/qemu-rhel-9) +RH-Bugzilla: 2041823 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Andrew Jones + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823 + +When the PPTT table is built, the CPU topology is re-calculated, but +it's unecessary because the CPU topology has been populated in +virt_possible_cpu_arch_ids() on arm/virt machine. + +This reworks build_pptt() to avoid by reusing the existing IDs in +ms->possible_cpus. Currently, the only user of build_pptt() is +arm/virt machine. + +Signed-off-by: Gavin Shan +Tested-by: Yanan Wang +Reviewed-by: Yanan Wang +Acked-by: Igor Mammedov +Acked-by: Michael S. Tsirkin +Message-id: 20220503140304.855514-7-gshan@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit ae9141d4a3265553503bf07d3574b40f84615a34) +Signed-off-by: Gavin Shan +--- + hw/acpi/aml-build.c | 111 +++++++++++++++++++------------------------- + 1 file changed, 48 insertions(+), 63 deletions(-) + +diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c +index 4086879ebf..e6bfac95c7 100644 +--- a/hw/acpi/aml-build.c ++++ b/hw/acpi/aml-build.c +@@ -2002,86 +2002,71 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, + const char *oem_id, const char *oem_table_id) + { + MachineClass *mc = MACHINE_GET_CLASS(ms); +- GQueue *list = g_queue_new(); +- guint pptt_start = table_data->len; +- guint parent_offset; +- guint length, i; +- int uid = 0; +- int socket; ++ CPUArchIdList *cpus = ms->possible_cpus; ++ int64_t socket_id = -1, cluster_id = -1, core_id = -1; ++ uint32_t socket_offset = 0, cluster_offset = 0, core_offset = 0; ++ uint32_t pptt_start = table_data->len; ++ int n; + AcpiTable table = { .sig = "PPTT", .rev = 2, + .oem_id = oem_id, .oem_table_id = oem_table_id }; + + acpi_table_begin(&table, table_data); + +- for (socket = 0; socket < ms->smp.sockets; socket++) { +- g_queue_push_tail(list, +- GUINT_TO_POINTER(table_data->len - pptt_start)); +- build_processor_hierarchy_node( +- table_data, +- /* +- * Physical package - represents the boundary +- * of a physical package +- */ +- (1 << 0), +- 0, socket, NULL, 0); +- } +- +- if (mc->smp_props.clusters_supported) { +- length = g_queue_get_length(list); +- for (i = 0; i < length; i++) { +- int cluster; +- +- parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); +- for (cluster = 0; cluster < ms->smp.clusters; cluster++) { +- g_queue_push_tail(list, +- GUINT_TO_POINTER(table_data->len - pptt_start)); +- build_processor_hierarchy_node( +- table_data, +- (0 << 0), /* not a physical package */ +- parent_offset, cluster, NULL, 0); +- } ++ /* ++ * This works with the assumption that cpus[n].props.*_id has been ++ * sorted from top to down levels in mc->possible_cpu_arch_ids(). ++ * Otherwise, the unexpected and duplicated containers will be ++ * created. ++ */ ++ for (n = 0; n < cpus->len; n++) { ++ if (cpus->cpus[n].props.socket_id != socket_id) { ++ assert(cpus->cpus[n].props.socket_id > socket_id); ++ socket_id = cpus->cpus[n].props.socket_id; ++ cluster_id = -1; ++ core_id = -1; ++ socket_offset = table_data->len - pptt_start; ++ build_processor_hierarchy_node(table_data, ++ (1 << 0), /* Physical package */ ++ 0, socket_id, NULL, 0); + } +- } + +- length = g_queue_get_length(list); +- for (i = 0; i < length; i++) { +- int core; +- +- parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); +- for (core = 0; core < ms->smp.cores; core++) { +- if (ms->smp.threads > 1) { +- g_queue_push_tail(list, +- GUINT_TO_POINTER(table_data->len - pptt_start)); +- build_processor_hierarchy_node( +- table_data, +- (0 << 0), /* not a physical package */ +- parent_offset, core, NULL, 0); +- } else { +- build_processor_hierarchy_node( +- table_data, +- (1 << 1) | /* ACPI Processor ID valid */ +- (1 << 3), /* Node is a Leaf */ +- parent_offset, uid++, NULL, 0); ++ if (mc->smp_props.clusters_supported) { ++ if (cpus->cpus[n].props.cluster_id != cluster_id) { ++ assert(cpus->cpus[n].props.cluster_id > cluster_id); ++ cluster_id = cpus->cpus[n].props.cluster_id; ++ core_id = -1; ++ cluster_offset = table_data->len - pptt_start; ++ build_processor_hierarchy_node(table_data, ++ (0 << 0), /* Not a physical package */ ++ socket_offset, cluster_id, NULL, 0); + } ++ } else { ++ cluster_offset = socket_offset; + } +- } + +- length = g_queue_get_length(list); +- for (i = 0; i < length; i++) { +- int thread; ++ if (ms->smp.threads == 1) { ++ build_processor_hierarchy_node(table_data, ++ (1 << 1) | /* ACPI Processor ID valid */ ++ (1 << 3), /* Node is a Leaf */ ++ cluster_offset, n, NULL, 0); ++ } else { ++ if (cpus->cpus[n].props.core_id != core_id) { ++ assert(cpus->cpus[n].props.core_id > core_id); ++ core_id = cpus->cpus[n].props.core_id; ++ core_offset = table_data->len - pptt_start; ++ build_processor_hierarchy_node(table_data, ++ (0 << 0), /* Not a physical package */ ++ cluster_offset, core_id, NULL, 0); ++ } + +- parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); +- for (thread = 0; thread < ms->smp.threads; thread++) { +- build_processor_hierarchy_node( +- table_data, ++ build_processor_hierarchy_node(table_data, + (1 << 1) | /* ACPI Processor ID valid */ + (1 << 2) | /* Processor is a Thread */ + (1 << 3), /* Node is a Leaf */ +- parent_offset, uid++, NULL, 0); ++ core_offset, n, NULL, 0); + } + } + +- g_queue_free(list); + acpi_table_end(linker, &table); + } + +-- +2.31.1 + diff --git a/kvm-hw-arm-virt-Add-8.6-machine-type.patch b/kvm-hw-arm-virt-Add-8.6-machine-type.patch new file mode 100644 index 0000000..f3c5492 --- /dev/null +++ b/kvm-hw-arm-virt-Add-8.6-machine-type.patch @@ -0,0 +1,57 @@ +From a154eb35d738aecf552d57d99499facce1c834ba Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Mon, 20 Dec 2021 15:24:24 +0100 +Subject: [PATCH 4/6] hw/arm/virt: Add 8.6 machine type + +RH-Author: Eric Auger +RH-MergeRequest: 95: hw/arm/virt: Add virt-rhel8.6.0 machine type +RH-Commit: [4/5] d0df3e796d3e9a6ca2af1e3b33fc6021bcac5d09 +RH-Bugzilla: 2031039 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Andrew Jones +RH-Acked-by: Gavin Shan + +branch: rhel-8.6.0 +Brew: 42212069 +Upstream: no + +Add 8.6 machine type. + +Signed-off-by: Eric Auger +--- + hw/arm/virt.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 6a4173b6c3..c9c17b9d45 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3228,17 +3228,23 @@ static void rhel_machine_init(void) + } + type_init(rhel_machine_init); + ++static void rhel860_virt_options(MachineClass *mc) ++{ ++ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++} ++DEFINE_RHEL_MACHINE_AS_LATEST(8, 6, 0) ++ + static void rhel850_virt_options(MachineClass *mc) + { + VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); + +- compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++ rhel860_virt_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); + mc->smp_props.prefer_sockets = true; + vmc->no_cpu_topology = true; + vmc->no_tcg_its = true; + } +-DEFINE_RHEL_MACHINE_AS_LATEST(8, 5, 0) ++DEFINE_RHEL_MACHINE(8, 5, 0) + + static void rhel840_virt_options(MachineClass *mc) + { +-- +2.27.0 + diff --git a/kvm-hw-arm-virt-Check-no_tcg_its-and-minor-style-changes.patch b/kvm-hw-arm-virt-Check-no_tcg_its-and-minor-style-changes.patch new file mode 100644 index 0000000..679f436 --- /dev/null +++ b/kvm-hw-arm-virt-Check-no_tcg_its-and-minor-style-changes.patch @@ -0,0 +1,86 @@ +From 1b4a8daf695a81f18ba70bea91b199da215da4e1 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Wed, 5 Jan 2022 16:17:10 +0100 +Subject: [PATCH 5/6] hw/arm/virt: Check no_tcg_its and minor style changes + +RH-Author: Eric Auger +RH-MergeRequest: 95: hw/arm/virt: Add virt-rhel8.6.0 machine type +RH-Commit: [5/5] 57e77446ff5a1a7efe152b2c907c0a0ca5487ab7 +RH-Bugzilla: 2031039 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Andrew Jones +RH-Acked-by: Gavin Shan + +branch: rhel-8.6.0 +Brew: 42212069 +Upstream: no + +Truly allow TCG ITS instantiation according to the no_tcg_its +class flag. Otherwise it is always set to false. + +We also take benefit of this patch to do some minor non +functional style changes to be closer to the upstream code. + +Signed-off-by: Eric Auger +--- + hw/arm/virt.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index c9c17b9d45..dbf0a6d62f 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3157,6 +3157,7 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "in ACPI table header." + "The string may be up to 6 bytes in size"); + ++ + object_class_property_add_str(oc, "x-oem-table-id", + virt_get_oem_table_id, + virt_set_oem_table_id); +@@ -3164,6 +3165,7 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "Override the default value of field OEM Table ID " + "in ACPI table header." + "The string may be up to 8 bytes in size"); ++ + } + + static void rhel_virt_instance_init(Object *obj) +@@ -3188,24 +3190,32 @@ static void rhel_virt_instance_init(Object *obj) + } else { + /* Default allows ITS instantiation */ + vms->its = true; ++ ++ if (vmc->no_tcg_its) { ++ vms->tcg_its = false; ++ } else { ++ vms->tcg_its = true; ++ } + } + + /* Default disallows iommu instantiation */ + vms->iommu = VIRT_IOMMU_NONE; + ++ /* The default root bus is attached to iommu by default */ ++ vms->default_bus_bypass_iommu = false; ++ + /* Default disallows RAS instantiation and is non-configurable for RHEL */ + vms->ras = false; + + /* MTE is disabled by default and non-configurable for RHEL */ + vms->mte = false; + +- vms->default_bus_bypass_iommu = false; + vms->irqmap = a15irqmap; + + virt_flash_create(vms); ++ + vms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); + vms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); +- + } + + static const TypeInfo rhel_machine_info = { +-- +2.27.0 + diff --git a/kvm-hw-arm-virt-Consider-SMP-configuration-in-CPU-topolo.patch b/kvm-hw-arm-virt-Consider-SMP-configuration-in-CPU-topolo.patch new file mode 100644 index 0000000..240aead --- /dev/null +++ b/kvm-hw-arm-virt-Consider-SMP-configuration-in-CPU-topolo.patch @@ -0,0 +1,74 @@ +From 3b05d3464945295112b5d02d142422f524a52054 Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 11 May 2022 18:01:35 +0800 +Subject: [PATCH 03/16] hw/arm/virt: Consider SMP configuration in CPU topology + +RH-Author: Gavin Shan +RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology +RH-Commit: [3/6] 7125b41f038c2b1cb33377d0ef1222f1ea42b648 (gwshan/qemu-rhel-9) +RH-Bugzilla: 2041823 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Andrew Jones + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823 + +Currently, the SMP configuration isn't considered when the CPU +topology is populated. In this case, it's impossible to provide +the default CPU-to-NUMA mapping or association based on the socket +ID of the given CPU. + +This takes account of SMP configuration when the CPU topology +is populated. The die ID for the given CPU isn't assigned since +it's not supported on arm/virt machine. Besides, the used SMP +configuration in qtest/numa-test/aarch64_numa_cpu() is corrcted +to avoid testing failure + +Signed-off-by: Gavin Shan +Reviewed-by: Yanan Wang +Acked-by: Igor Mammedov +Message-id: 20220503140304.855514-4-gshan@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit c9ec4cb5e4936f980889e717524e73896b0200ed) +Signed-off-by: Gavin Shan +--- + hw/arm/virt.c | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 8be12e121d..a87c8d396a 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2553,6 +2553,7 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) + int n; + unsigned int max_cpus = ms->smp.max_cpus; + VirtMachineState *vms = VIRT_MACHINE(ms); ++ MachineClass *mc = MACHINE_GET_CLASS(vms); + + if (ms->possible_cpus) { + assert(ms->possible_cpus->len == max_cpus); +@@ -2566,8 +2567,20 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) + ms->possible_cpus->cpus[n].type = ms->cpu_type; + ms->possible_cpus->cpus[n].arch_id = + virt_cpu_mp_affinity(vms, n); ++ ++ assert(!mc->smp_props.dies_supported); ++ ms->possible_cpus->cpus[n].props.has_socket_id = true; ++ ms->possible_cpus->cpus[n].props.socket_id = ++ n / (ms->smp.clusters * ms->smp.cores * ms->smp.threads); ++ ms->possible_cpus->cpus[n].props.has_cluster_id = true; ++ ms->possible_cpus->cpus[n].props.cluster_id = ++ (n / (ms->smp.cores * ms->smp.threads)) % ms->smp.clusters; ++ ms->possible_cpus->cpus[n].props.has_core_id = true; ++ ms->possible_cpus->cpus[n].props.core_id = ++ (n / ms->smp.threads) % ms->smp.cores; + ms->possible_cpus->cpus[n].props.has_thread_id = true; +- ms->possible_cpus->cpus[n].props.thread_id = n; ++ ms->possible_cpus->cpus[n].props.thread_id = ++ n % ms->smp.threads; + } + return ms->possible_cpus; + } +-- +2.31.1 + diff --git a/kvm-hw-arm-virt-Fix-CPU-s-default-NUMA-node-ID.patch b/kvm-hw-arm-virt-Fix-CPU-s-default-NUMA-node-ID.patch new file mode 100644 index 0000000..6b60b70 --- /dev/null +++ b/kvm-hw-arm-virt-Fix-CPU-s-default-NUMA-node-ID.patch @@ -0,0 +1,88 @@ +From 14e49ad3b98f01c1ad6fe456469d40a96a43dc3c Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 11 May 2022 18:01:35 +0800 +Subject: [PATCH 05/16] hw/arm/virt: Fix CPU's default NUMA node ID + +RH-Author: Gavin Shan +RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology +RH-Commit: [5/6] 5336f62bc0c53c0417db1d71ef89544907bc28c0 (gwshan/qemu-rhel-9) +RH-Bugzilla: 2041823 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Andrew Jones + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823 + +When CPU-to-NUMA association isn't explicitly provided by users, +the default one is given by mc->get_default_cpu_node_id(). However, +the CPU topology isn't fully considered in the default association +and this causes CPU topology broken warnings on booting Linux guest. + +For example, the following warning messages are observed when the +Linux guest is booted with the following command lines. + +/home/gavin/sandbox/qemu.main/build/qemu-system-aarch64 \ +-accel kvm -machine virt,gic-version=host \ +-cpu host \ +-smp 6,sockets=2,cores=3,threads=1 \ +-m 1024M,slots=16,maxmem=64G \ +-object memory-backend-ram,id=mem0,size=128M \ +-object memory-backend-ram,id=mem1,size=128M \ +-object memory-backend-ram,id=mem2,size=128M \ +-object memory-backend-ram,id=mem3,size=128M \ +-object memory-backend-ram,id=mem4,size=128M \ +-object memory-backend-ram,id=mem4,size=384M \ +-numa node,nodeid=0,memdev=mem0 \ +-numa node,nodeid=1,memdev=mem1 \ +-numa node,nodeid=2,memdev=mem2 \ +-numa node,nodeid=3,memdev=mem3 \ +-numa node,nodeid=4,memdev=mem4 \ +-numa node,nodeid=5,memdev=mem5 +: +alternatives: patching kernel code +BUG: arch topology borken +the CLS domain not a subset of the MC domain + +BUG: arch topology borken +the DIE domain not a subset of the NODE domain + +With current implementation of mc->get_default_cpu_node_id(), +CPU#0 to CPU#5 are associated with NODE#0 to NODE#5 separately. +That's incorrect because CPU#0/1/2 should be associated with same +NUMA node because they're seated in same socket. + +This fixes the issue by considering the socket ID when the default +CPU-to-NUMA association is provided in virt_possible_cpu_arch_ids(). +With this applied, no more CPU topology broken warnings are seen +from the Linux guest. The 6 CPUs are associated with NODE#0/1, but +there are no CPUs associated with NODE#2/3/4/5. + +Signed-off-by: Gavin Shan +Reviewed-by: Igor Mammedov +Reviewed-by: Yanan Wang +Message-id: 20220503140304.855514-6-gshan@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit 4c18bc192386dfbca530e7f550e0992df657818a) +Signed-off-by: Gavin Shan +--- + hw/arm/virt.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index a87c8d396a..95d012d6eb 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2545,7 +2545,9 @@ virt_cpu_index_to_props(MachineState *ms, unsigned cpu_index) + + static int64_t virt_get_default_cpu_node_id(const MachineState *ms, int idx) + { +- return idx % ms->numa_state->num_nodes; ++ int64_t socket_id = ms->possible_cpus->cpus[idx].props.socket_id; ++ ++ return socket_id % ms->numa_state->num_nodes; + } + + static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) +-- +2.31.1 + diff --git a/kvm-hw-arm-virt-Fix-missing-initialization-in-instance-c.patch b/kvm-hw-arm-virt-Fix-missing-initialization-in-instance-c.patch new file mode 100644 index 0000000..78b9ee0 --- /dev/null +++ b/kvm-hw-arm-virt-Fix-missing-initialization-in-instance-c.patch @@ -0,0 +1,56 @@ +From e25c40735d2f022c07481b548d20476222006657 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Wed, 4 May 2022 11:11:54 +0200 +Subject: [PATCH 2/5] hw/arm/virt: Fix missing initialization in + instance/class_init() + +RH-Author: Eric Auger +RH-MergeRequest: 82: hw/arm/virt: Remove the dtb-kaslr-seed machine option +RH-Commit: [2/2] 22cbbfc30cf57a09b8acfb25d8a4dff2754c630c (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2046029 +RH-Acked-by: Gavin Shan +RH-Acked-by: Andrew Jones +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2046029 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45133161 +Upstream Status: RHEL-only +Tested: Boot RHEL guest and check migration from 8.6 to 9.1 + (with custom additions) + +During the 7.0 rebase, the initialization of highmem_mmio and +highmem_redists was forgotten in rhel_virt_instance_init(). +Fix it to match virt_instance_init() code. + +Also mc->smp_props.clusters_supported was missing in +rhel_machine_class_init(). + +Signed-off-by: Eric Auger +--- + hw/arm/virt.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index bde4f77994..8be12e121d 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3286,6 +3286,7 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + hc->unplug_request = virt_machine_device_unplug_request_cb; + hc->unplug = virt_machine_device_unplug_cb; + mc->nvdimm_supported = true; ++ mc->smp_props.clusters_supported = true; + mc->auto_enable_numa_with_memhp = true; + mc->auto_enable_numa_with_memdev = true; + mc->default_ram_id = "mach-virt.ram"; +@@ -3366,6 +3367,8 @@ static void rhel_virt_instance_init(Object *obj) + vms->gic_version = VIRT_GIC_VERSION_NOSEL; + + vms->highmem_ecam = !vmc->no_highmem_ecam; ++ vms->highmem_mmio = true; ++ vms->highmem_redists = true; + + if (vmc->no_its) { + vms->its = false; +-- +2.31.1 + diff --git a/kvm-hw-arm-virt-Register-iommu-as-a-class-property.patch b/kvm-hw-arm-virt-Register-iommu-as-a-class-property.patch new file mode 100644 index 0000000..734756d --- /dev/null +++ b/kvm-hw-arm-virt-Register-iommu-as-a-class-property.patch @@ -0,0 +1,78 @@ +From 8d5b57798d079307a98f6be5e1f6d28d1937a2fe Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Mon, 20 Dec 2021 15:50:44 +0100 +Subject: [PATCH 1/6] hw/arm/virt: Register "iommu" as a class property + +RH-Author: Eric Auger +RH-MergeRequest: 95: hw/arm/virt: Add virt-rhel8.6.0 machine type +RH-Commit: [1/5] 74b01bb90213493db700d5bdf81dd99892571972 +RH-Bugzilla: 2031039 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Andrew Jones +RH-Acked-by: Gavin Shan + +branch: rhel-8.6.0 +Brew: 42212069 +Upstream: no + +Register the "iommu" option as a class property. This mirrors what +was done in upstream commit b91def7b ("arm/virt: Register +most properties as class properties"). + +While we are at it we also move the "x-oem-id" and "x-oem-table-id" +registrations at the very end of the rhel_machine_class_init() +function. This makes our life easier when comparing with upstream. + +Signed-off-by: Eric Auger +--- + hw/arm/virt.c | 20 ++++++++++++-------- + 1 file changed, 12 insertions(+), 8 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index e8941afd01..684ffce52e 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3131,6 +3131,18 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "Set GIC version. " + "Valid values are 2, 3, host and max"); + ++ object_class_property_add_str(oc, "iommu", virt_get_iommu, virt_set_iommu); ++ object_class_property_set_description(oc, "iommu", ++ "Set the IOMMU type. " ++ "Valid values are none and smmuv3"); ++ ++ object_class_property_add_bool(oc, "default_bus_bypass_iommu", ++ virt_get_default_bus_bypass_iommu, ++ virt_set_default_bus_bypass_iommu); ++ object_class_property_set_description(oc, "default_bus_bypass_iommu", ++ "Set on/off to enable/disable " ++ "bypass_iommu for default root bus"); ++ + object_class_property_add_str(oc, "x-oem-id", + virt_get_oem_id, + virt_set_oem_id); +@@ -3146,10 +3158,6 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "Override the default value of field OEM Table ID " + "in ACPI table header." + "The string may be up to 8 bytes in size"); +- object_class_property_add_bool(oc, "default_bus_bypass_iommu", +- virt_get_default_bus_bypass_iommu, +- virt_set_default_bus_bypass_iommu); +- + } + + static void rhel_virt_instance_init(Object *obj) +@@ -3183,10 +3191,6 @@ static void rhel_virt_instance_init(Object *obj) + + /* Default disallows iommu instantiation */ + vms->iommu = VIRT_IOMMU_NONE; +- object_property_add_str(obj, "iommu", virt_get_iommu, virt_set_iommu); +- object_property_set_description(obj, "iommu", +- "Set the IOMMU type. " +- "Valid values are none and smmuv3"); + + /* Default disallows RAS instantiation and is non-configurable for RHEL */ + vms->ras = false; +-- +2.27.0 + diff --git a/kvm-hw-arm-virt-Register-its-as-a-class-property.patch b/kvm-hw-arm-virt-Register-its-as-a-class-property.patch new file mode 100644 index 0000000..91b353a --- /dev/null +++ b/kvm-hw-arm-virt-Register-its-as-a-class-property.patch @@ -0,0 +1,57 @@ +From 07e2094cd86c1be349c0bdda69acd1857afacb66 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Mon, 20 Dec 2021 16:04:59 +0100 +Subject: [PATCH 2/6] hw/arm/virt: Register "its" as a class property + +RH-Author: Eric Auger +RH-MergeRequest: 95: hw/arm/virt: Add virt-rhel8.6.0 machine type +RH-Commit: [2/5] 4ddfa57495578127770f93689c4d9f111a12b91c +RH-Bugzilla: 2031039 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Andrew Jones +RH-Acked-by: Gavin Shan + +branch: rhel-8.6.0 +Brew: 42212069 +Upstream: no + +Register "its" as a class property. This mirrors what was done +in commit 27edeeaafe43 ("virt: Register "its" as class property"). + +Signed-off-by: Eric Auger +--- + hw/arm/virt.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 684ffce52e..d679391eb0 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3143,6 +3143,12 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "Set on/off to enable/disable " + "bypass_iommu for default root bus"); + ++ object_class_property_add_bool(oc, "its", virt_get_its, ++ virt_set_its); ++ object_class_property_set_description(oc, "its", ++ "Set on/off to enable/disable " ++ "ITS instantiation"); ++ + object_class_property_add_str(oc, "x-oem-id", + virt_get_oem_id, + virt_set_oem_id); +@@ -3182,11 +3188,6 @@ static void rhel_virt_instance_init(Object *obj) + } else { + /* Default allows ITS instantiation */ + vms->its = true; +- object_property_add_bool(obj, "its", virt_get_its, +- virt_set_its); +- object_property_set_description(obj, "its", +- "Set on/off to enable/disable " +- "ITS instantiation"); + } + + /* Default disallows iommu instantiation */ +-- +2.27.0 + diff --git a/kvm-hw-arm-virt-Remove-the-dtb-kaslr-seed-machine-option.patch b/kvm-hw-arm-virt-Remove-the-dtb-kaslr-seed-machine-option.patch new file mode 100644 index 0000000..10af6c0 --- /dev/null +++ b/kvm-hw-arm-virt-Remove-the-dtb-kaslr-seed-machine-option.patch @@ -0,0 +1,76 @@ +From 69f771c3dc641431f3e98497cbd3832edb69284f Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Tue, 3 May 2022 08:56:52 +0200 +Subject: [PATCH 1/5] hw/arm/virt: Remove the dtb-kaslr-seed machine option + +RH-Author: Eric Auger +RH-MergeRequest: 82: hw/arm/virt: Remove the dtb-kaslr-seed machine option +RH-Commit: [1/2] a89dcd7f22e04ae39de99795d3f34cdd0b831bc0 (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2046029 +RH-Acked-by: Gavin Shan +RH-Acked-by: Andrew Jones +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2046029 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45133161 +Upstream Status: RHEL-only +Tested: Boot RHEL guest and check the option is not available + +In RHEL we do not want to expose the dtb-kaslr-seed virt machine +option. Indeed the default 'on' value matches our need as +random data in the DTB does not cause any boot failure and we +want to support KASLR for the guest. + +Signed-off-by: Eric Auger + +--- +--- + hw/arm/virt.c | 11 +++-------- + 1 file changed, 3 insertions(+), 8 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index e06862d22a..bde4f77994 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2350,6 +2350,7 @@ static void virt_set_its(Object *obj, bool value, Error **errp) + vms->its = value; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static bool virt_get_dtb_kaslr_seed(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -2363,6 +2364,7 @@ static void virt_set_dtb_kaslr_seed(Object *obj, bool value, Error **errp) + + vms->dtb_kaslr_seed = value; + } ++#endif /* disabled for RHEL */ + + static char *virt_get_oem_id(Object *obj, Error **errp) + { +@@ -3346,13 +3348,6 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "Override the default value of field OEM Table ID " + "in ACPI table header." + "The string may be up to 8 bytes in size"); +- +- object_class_property_add_bool(oc, "dtb-kaslr-seed", +- virt_get_dtb_kaslr_seed, +- virt_set_dtb_kaslr_seed); +- object_class_property_set_description(oc, "dtb-kaslr-seed", +- "Set off to disable passing of kaslr-seed " +- "dtb node to guest"); + } + + static void rhel_virt_instance_init(Object *obj) +@@ -3397,7 +3392,7 @@ static void rhel_virt_instance_init(Object *obj) + /* MTE is disabled by default and non-configurable for RHEL */ + vms->mte = false; + +- /* Supply a kaslr-seed by default */ ++ /* Supply a kaslr-seed by default and non-configurable for RHEL */ + vms->dtb_kaslr_seed = true; + + vms->irqmap = a15irqmap; +-- +2.31.1 + diff --git a/kvm-hw-arm-virt-Rename-default_bus_bypass_iommu.patch b/kvm-hw-arm-virt-Rename-default_bus_bypass_iommu.patch new file mode 100644 index 0000000..25e20ea --- /dev/null +++ b/kvm-hw-arm-virt-Rename-default_bus_bypass_iommu.patch @@ -0,0 +1,46 @@ +From e896ba2bfbb613576ec3fbe5b948a326ac06193d Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Mon, 20 Dec 2021 15:58:38 +0100 +Subject: [PATCH 3/6] hw/arm/virt: Rename default_bus_bypass_iommu + +RH-Author: Eric Auger +RH-MergeRequest: 95: hw/arm/virt: Add virt-rhel8.6.0 machine type +RH-Commit: [3/5] 3ed0425391dab7cf14c6e66fc1b2430be1152d6c +RH-Bugzilla: 2031039 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Andrew Jones +RH-Acked-by: Gavin Shan + +branch: rhel-8.6.0 +Brew: 42212069 +Upstream: no + +Rename "default_bus_bypass_iommu" into "default-bus-bypass-iommu". +This mirrors what was done in upstream commit: +9dad363a223 ("hw/arm/virt: Rename default_bus_bypass_iommu") + +Signed-off-by: Eric Auger +--- + hw/arm/virt.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index d679391eb0..6a4173b6c3 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3136,10 +3136,10 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "Set the IOMMU type. " + "Valid values are none and smmuv3"); + +- object_class_property_add_bool(oc, "default_bus_bypass_iommu", ++ object_class_property_add_bool(oc, "default-bus-bypass-iommu", + virt_get_default_bus_bypass_iommu, + virt_set_default_bus_bypass_iommu); +- object_class_property_set_description(oc, "default_bus_bypass_iommu", ++ object_class_property_set_description(oc, "default-bus-bypass-iommu", + "Set on/off to enable/disable " + "bypass_iommu for default root bus"); + +-- +2.27.0 + diff --git a/kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch b/kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch new file mode 100644 index 0000000..1bdad27 --- /dev/null +++ b/kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch @@ -0,0 +1,96 @@ +From 6ee4a8718dcce2d6da43ee200534b75baf1d7bbe Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Thu, 18 Nov 2021 12:57:32 +0100 +Subject: [PATCH 16/17] hw/block/fdc: Prevent end-of-track overrun + (CVE-2021-3507) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 107: hw/block/fdc: Prevent end-of-track overrun (CVE-2021-3507) +RH-Commit: [1/2] 9ffc5290348884d20b894fa79f4d0c8089247f8b (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1951522 +RH-Acked-by: Hanna Reitz +RH-Acked-by: Miroslav Rezanina + +Per the 82078 datasheet, if the end-of-track (EOT byte in +the FIFO) is more than the number of sectors per side, the +command is terminated unsuccessfully: + +* 5.2.5 DATA TRANSFER TERMINATION + + The 82078 supports terminal count explicitly through + the TC pin and implicitly through the underrun/over- + run and end-of-track (EOT) functions. For full sector + transfers, the EOT parameter can define the last + sector to be transferred in a single or multisector + transfer. If the last sector to be transferred is a par- + tial sector, the host can stop transferring the data in + mid-sector, and the 82078 will continue to complete + the sector as if a hardware TC was received. The + only difference between these implicit functions and + TC is that they return "abnormal termination" result + status. Such status indications can be ignored if they + were expected. + +* 6.1.3 READ TRACK + + This command terminates when the EOT specified + number of sectors have been read. If the 82078 + does not find an I D Address Mark on the diskette + after the second· occurrence of a pulse on the + INDX# pin, then it sets the IC code in Status Regis- + ter 0 to "01" (Abnormal termination), sets the MA bit + in Status Register 1 to "1", and terminates the com- + mand. + +* 6.1.6 VERIFY + + Refer to Table 6-6 and Table 6-7 for information + concerning the values of MT and EC versus SC and + EOT value. + +* Table 6·6. Result Phase Table + +* Table 6-7. Verify Command Result Phase Table + +Fix by aborting the transfer when EOT > # Sectors Per Side. + +Cc: qemu-stable@nongnu.org +Cc: Hervé Poussineau +Fixes: baca51faff0 ("floppy driver: disk geometry auto detect") +Reported-by: Alexander Bulekov +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/339 +Signed-off-by: Philippe Mathieu-Daudé +Message-Id: <20211118115733.4038610-2-philmd@redhat.com> +Reviewed-by: Hanna Reitz +Signed-off-by: Kevin Wolf +(cherry picked from commit defac5e2fbddf8423a354ff0454283a2115e1367) +Signed-off-by: Jon Maloy +--- + hw/block/fdc.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/hw/block/fdc.c b/hw/block/fdc.c +index ca1776121f..6481ec0cfb 100644 +--- a/hw/block/fdc.c ++++ b/hw/block/fdc.c +@@ -1532,6 +1532,14 @@ static void fdctrl_start_transfer(FDCtrl *fdctrl, int direction) + int tmp; + fdctrl->data_len = 128 << (fdctrl->fifo[5] > 7 ? 7 : fdctrl->fifo[5]); + tmp = (fdctrl->fifo[6] - ks + 1); ++ if (tmp < 0) { ++ FLOPPY_DPRINTF("invalid EOT: %d\n", tmp); ++ fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM, FD_SR1_MA, 0x00); ++ fdctrl->fifo[3] = kt; ++ fdctrl->fifo[4] = kh; ++ fdctrl->fifo[5] = ks; ++ return; ++ } + if (fdctrl->fifo[0] & 0x80) + tmp += fdctrl->fifo[6]; + fdctrl->data_len *= tmp; +-- +2.31.1 + diff --git a/kvm-hw-intc-arm_gicv3-Check-for-MEMTX_OK-instead-of-MEMT.patch b/kvm-hw-intc-arm_gicv3-Check-for-MEMTX_OK-instead-of-MEMT.patch new file mode 100644 index 0000000..eea6fa2 --- /dev/null +++ b/kvm-hw-intc-arm_gicv3-Check-for-MEMTX_OK-instead-of-MEMT.patch @@ -0,0 +1,75 @@ +From 2db3d0de1be018f14cb91fdd4a368996b09d8bec Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 13 Apr 2022 14:51:06 -0400 +Subject: [PATCH 1/3] hw/intc/arm_gicv3: Check for !MEMTX_OK instead of + MEMTX_ERROR +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 151: hw/intc/arm_gicv3: Check for !MEMTX_OK instead of MEMTX_ERROR +RH-Commit: [1/3] 561c9c2b1249f07d33013040b1c495ed1fbf825b (jmaloy/qemu-kvm) +RH-Bugzilla: 1999236 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Peter Xu + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1999236 +Upstream: Merged +CVE: CVE-2021-3750 + +commit b9d383ab797f54ae5fa8746117770709921dc529 +Author: Philippe Mathieu-Daudé +Date: Wed Dec 15 19:24:19 2021 +0100 + + hw/intc/arm_gicv3: Check for !MEMTX_OK instead of MEMTX_ERROR + + Quoting Peter Maydell: + + "These MEMTX_* aren't from the memory transaction + API functions; they're just being used by gicd_readl() and + friends as a way to indicate a success/failure so that the + actual MemoryRegionOps read/write fns like gicv3_dist_read() + can log a guest error." + + We are going to introduce more MemTxResult bits, so it is + safer to check for !MEMTX_OK rather than MEMTX_ERROR. + + Reviewed-by: Peter Xu + Reviewed-by: David Hildenbrand + Reviewed-by: Peter Maydell + Reviewed-by: Stefan Hajnoczi + Signed-off-by: Philippe Mathieu-Daudé + Signed-off-by: Peter Maydell + +(cherry picked from commit b9d383ab797f54ae5fa8746117770709921dc529) +Signed-off-by: Jon Maloy +--- + hw/intc/arm_gicv3_redist.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/intc/arm_gicv3_redist.c b/hw/intc/arm_gicv3_redist.c +index c8ff3eca08..99b11ca5ee 100644 +--- a/hw/intc/arm_gicv3_redist.c ++++ b/hw/intc/arm_gicv3_redist.c +@@ -462,7 +462,7 @@ MemTxResult gicv3_redist_read(void *opaque, hwaddr offset, uint64_t *data, + break; + } + +- if (r == MEMTX_ERROR) { ++ if (r != MEMTX_OK) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: invalid guest read at offset " TARGET_FMT_plx + " size %u\n", __func__, offset, size); +@@ -521,7 +521,7 @@ MemTxResult gicv3_redist_write(void *opaque, hwaddr offset, uint64_t data, + break; + } + +- if (r == MEMTX_ERROR) { ++ if (r != MEMTX_OK) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: invalid guest write at offset " TARGET_FMT_plx + " size %u\n", __func__, offset, size); +-- +2.27.0 + diff --git a/kvm-hw-virtio-Replace-g_memdup-by-g_memdup2.patch b/kvm-hw-virtio-Replace-g_memdup-by-g_memdup2.patch new file mode 100644 index 0000000..44897ac --- /dev/null +++ b/kvm-hw-virtio-Replace-g_memdup-by-g_memdup2.patch @@ -0,0 +1,95 @@ +From 4dad0e9abbc843fba4e5fee6e7aa1b0db13f5898 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:27:35 +0200 +Subject: [PATCH 03/32] hw/virtio: Replace g_memdup() by g_memdup2() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [3/27] ae196903eb1a7aebbf999100e997cf82e5024cb6 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit d792199de55ca5cb5334016884039c740290b5c7 +Author: Philippe Mathieu-Daudé +Date: Thu May 12 19:57:46 2022 +0200 + + hw/virtio: Replace g_memdup() by g_memdup2() + + Per https://discourse.gnome.org/t/port-your-module-from-g-memdup-to-g-memdup2-now/5538 + + The old API took the size of the memory to duplicate as a guint, + whereas most memory functions take memory sizes as a gsize. This + made it easy to accidentally pass a gsize to g_memdup(). For large + values, that would lead to a silent truncation of the size from 64 + to 32 bits, and result in a heap area being returned which is + significantly smaller than what the caller expects. This can likely + be exploited in various modules to cause a heap buffer overflow. + + Replace g_memdup() by the safer g_memdup2() wrapper. + + Acked-by: Jason Wang + Acked-by: Eugenio Pérez + Signed-off-by: Philippe Mathieu-Daudé + Message-Id: <20220512175747.142058-6-eperezma@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +Signed-off-by: Eugenio Pérez +--- + hw/net/virtio-net.c | 3 ++- + hw/virtio/virtio-crypto.c | 6 +++--- + 2 files changed, 5 insertions(+), 4 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 099e65036d..633de61513 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -1458,7 +1458,8 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) + } + + iov_cnt = elem->out_num; +- iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num); ++ iov2 = iov = g_memdup2(elem->out_sg, ++ sizeof(struct iovec) * elem->out_num); + s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl)); + iov_discard_front(&iov, &iov_cnt, sizeof(ctrl)); + if (s != sizeof(ctrl)) { +diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c +index dcd80b904d..0e31e3cc04 100644 +--- a/hw/virtio/virtio-crypto.c ++++ b/hw/virtio/virtio-crypto.c +@@ -242,7 +242,7 @@ static void virtio_crypto_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) + } + + out_num = elem->out_num; +- out_iov_copy = g_memdup(elem->out_sg, sizeof(out_iov[0]) * out_num); ++ out_iov_copy = g_memdup2(elem->out_sg, sizeof(out_iov[0]) * out_num); + out_iov = out_iov_copy; + + in_num = elem->in_num; +@@ -605,11 +605,11 @@ virtio_crypto_handle_request(VirtIOCryptoReq *request) + } + + out_num = elem->out_num; +- out_iov_copy = g_memdup(elem->out_sg, sizeof(out_iov[0]) * out_num); ++ out_iov_copy = g_memdup2(elem->out_sg, sizeof(out_iov[0]) * out_num); + out_iov = out_iov_copy; + + in_num = elem->in_num; +- in_iov_copy = g_memdup(elem->in_sg, sizeof(in_iov[0]) * in_num); ++ in_iov_copy = g_memdup2(elem->in_sg, sizeof(in_iov[0]) * in_num); + in_iov = in_iov_copy; + + if (unlikely(iov_to_buf(out_iov, out_num, 0, &req, sizeof(req)) +-- +2.31.1 + diff --git a/kvm-hw-virtio-vdpa-Fix-leak-of-host-notifier-memory-regi.patch b/kvm-hw-virtio-vdpa-Fix-leak-of-host-notifier-memory-regi.patch new file mode 100644 index 0000000..bb42634 --- /dev/null +++ b/kvm-hw-virtio-vdpa-Fix-leak-of-host-notifier-memory-regi.patch @@ -0,0 +1,66 @@ +From f0115d856f46e65e3b62896f84fe1902a958bf79 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 22 Mar 2022 19:23:36 -0400 +Subject: [PATCH 04/18] hw/virtio: vdpa: Fix leak of host-notifier + memory-region + +RH-Author: Jon Maloy +RH-MergeRequest: 132: hw/virtio: vdpa: Fix leak of host-notifier memory-region +RH-Commit: [1/1] b3cec35d185e3b9844a458f5c51c5d5ef7e3d8f1 (jmaloy/qemu-kvm) +RH-Bugzilla: 2060843 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Laurent Vivier +RH-Acked-by: Igor Mammedov + +BZ: https://bugzilla.redhat.com/2060843 +UPSTREAM: no +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=44038138 + +commit 98f7607ecda00dea3cbb2ed7b4427c96846efb83 +Author: Laurent Vivier +Date: Fri Feb 11 18:02:59 2022 +0100 + + hw/virtio: vdpa: Fix leak of host-notifier memory-region + + If call virtio_queue_set_host_notifier_mr fails, should free + host-notifier memory-region. + + This problem can trigger a coredump with some vDPA drivers (mlx5, + but not with the vdpasim), if we unplug the virtio-net card from + the guest after a stop/start. + + The same fix has been done for vhost-user: + 1f89d3b91e3e ("hw/virtio: Fix leak of host-notifier memory-region") + + Fixes: d0416d487bd5 ("vhost-vdpa: map virtqueue notification area if possible") + Cc: jasowang@redhat.com + Resolves: https://bugzilla.redhat.com/2027208 + Signed-off-by: Laurent Vivier + Message-Id: <20220211170259.1388734-1-lvivier@redhat.com> + Cc: qemu-stable@nongnu.org + Acked-by: Jason Wang + Reviewed-by: Stefano Garzarella + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit 98f7607ecda00dea3cbb2ed7b4427c96846efb83) +Signed-off-by: Jon Maloy +--- + hw/virtio/vhost-vdpa.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index bcaf00e09f..78da48a333 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -415,6 +415,7 @@ static int vhost_vdpa_host_notifier_init(struct vhost_dev *dev, int queue_index) + g_free(name); + + if (virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, true)) { ++ object_unparent(OBJECT(&n->mr)); + munmap(addr, page_size); + goto err; + } +-- +2.27.0 + diff --git a/kvm-i386-Add-Icelake-Server-v6-CPU-model-with-5-level-EP.patch b/kvm-i386-Add-Icelake-Server-v6-CPU-model-with-5-level-EP.patch new file mode 100644 index 0000000..540f721 --- /dev/null +++ b/kvm-i386-Add-Icelake-Server-v6-CPU-model-with-5-level-EP.patch @@ -0,0 +1,59 @@ +From ccaa1135bd1aa90c94f0e8b5417bd2a420134e6c Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 30 Mar 2022 14:52:34 -0400 +Subject: [PATCH 08/18] i386: Add Icelake-Server-v6 CPU model with 5-level EPT + support + +RH-Author: Jon Maloy +RH-MergeRequest: 139: vmxcap: Add 5-level EPT bit +RH-Commit: [2/2] e913746b2df9cbd0308014ab5cc72577458857fa (jmaloy/qemu-kvm) +RH-Bugzilla: 2065207 +RH-Acked-by: Paolo Bonzini + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2065207 +UPSTREAM: Merged + +commit: 12cab535db6440af41ed8dfefe908a594321b6ce +Author: Vitaly Kuznetsov +Date: Mon Feb 21 15:53:15 2022 +0100 + + i386: Add Icelake-Server-v6 CPU model with 5-level EPT support + + Windows 11 with WSL2 enabled (Hyper-V) fails to boot with Icelake-Server + {-v5} CPU model but boots well with '-cpu host'. Apparently, it expects + 5-level paging and 5-level EPT support to come in pair but QEMU's + Icelake-Server CPU model lacks the later. Introduce 'Icelake-Server-v6' + CPU model with 'vmx-page-walk-5' enabled by default. + + Signed-off-by: Vitaly Kuznetsov + Message-Id: <20220221145316.576138-1-vkuznets@redhat.com> + Signed-off-by: Paolo Bonzini + +(cherry picked from commit 12cab535db6440af41ed8dfefe908a594321b6ce) +Signed-off-by: Jon Maloy +--- + target/i386/cpu.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index aa9e636800..6e25d13339 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -3505,6 +3505,14 @@ static const X86CPUDefinition builtin_x86_defs[] = { + { /* end of list */ } + }, + }, ++ { ++ .version = 6, ++ .note = "5-level EPT", ++ .props = (PropValue[]) { ++ { "vmx-page-walk-5", "on" }, ++ { /* end of list */ } ++ }, ++ }, + { /* end of list */ } + } + }, +-- +2.27.0 + diff --git a/kvm-iotests-108-Fix-when-missing-user_allow_other.patch b/kvm-iotests-108-Fix-when-missing-user_allow_other.patch new file mode 100644 index 0000000..a37ea6f --- /dev/null +++ b/kvm-iotests-108-Fix-when-missing-user_allow_other.patch @@ -0,0 +1,52 @@ +From 447bca651c9156d7aba6b7495c75f19b5e4ed53f Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Thu, 21 Apr 2022 16:24:35 +0200 +Subject: [PATCH 07/16] iotests/108: Fix when missing user_allow_other + +RH-Author: Hanna Reitz +RH-MergeRequest: 96: qcow2: Improve refcount structure rebuilding +RH-Commit: [4/4] a51ab8606fc9d8dea2b6539f4e795d5813892a5c (hreitz/qemu-kvm-c-9-s) +RH-Bugzilla: 2072379 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +FUSE exports' allow-other option defaults to "auto", which means that it +will try passing allow_other as a mount option, and fall back to not +using it when an error occurs. We make no effort to hide fusermount's +error message (because it would be difficult, and because users might +want to know about the fallback occurring), and so when allow_other does +not work (primarily when /etc/fuse.conf does not contain +user_allow_other), this error message will appear and break the +reference output. + +We do not need allow_other here, though, so we can just pass +allow-other=off to fix that. + +Reported-by: Markus Armbruster +Signed-off-by: Hanna Reitz +Message-Id: <20220421142435.569600-1-hreitz@redhat.com> +Tested-by: Markus Armbruster +Tested-by: Eric Blake +(cherry picked from commit 348a0740afc5b313599533eb69bbb2b95d2f1bba) +Signed-off-by: Hanna Reitz +--- + tests/qemu-iotests/108 | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tests/qemu-iotests/108 b/tests/qemu-iotests/108 +index a3090e2875..4681c7c769 100755 +--- a/tests/qemu-iotests/108 ++++ b/tests/qemu-iotests/108 +@@ -326,7 +326,7 @@ else + + $QSD \ + --blockdev file,node-name=export-node,filename="$TEST_IMG" \ +- --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=off \ ++ --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=off,allow-other=off \ + --pidfile "$TEST_DIR/qsd.pid" \ + & + +-- +2.31.1 + diff --git a/kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch b/kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch new file mode 100644 index 0000000..7a968f6 --- /dev/null +++ b/kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch @@ -0,0 +1,445 @@ +From ed69e01352b5e9a06173daab53bfa373c8535732 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Tue, 5 Apr 2022 15:46:51 +0200 +Subject: [PATCH 05/16] iotests/108: Test new refcount rebuild algorithm + +RH-Author: Hanna Reitz +RH-MergeRequest: 96: qcow2: Improve refcount structure rebuilding +RH-Commit: [2/4] b68310a9fee8465dd3f568c8e867e1b7ae52bdaf (hreitz/qemu-kvm-c-9-s) +RH-Bugzilla: 2072379 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +One clear problem with how qcow2's refcount structure rebuild algorithm +used to be before "qcow2: Improve refcount structure rebuilding" was +that it is prone to failure for qcow2 images on block devices: There is +generally unused space after the actual image, and if that exceeds what +one refblock covers, the old algorithm would invariably write the +reftable past the block device's end, which cannot work. The new +algorithm does not have this problem. + +Test it with three tests: +(1) Create an image with more empty space at the end than what one + refblock covers, see whether rebuilding the refcount structures + results in a change in the image file length. (It should not.) + +(2) Leave precisely enough space somewhere at the beginning of the image + for the new reftable (and the refblock for that place), see whether + the new algorithm puts the reftable there. (It should.) + +(3) Test the original problem: Create (something like) a block device + with a fixed size, then create a qcow2 image in there, write some + data, and then have qemu-img check rebuild the refcount structures. + Before HEAD^, the reftable would have been written past the image + file end, i.e. outside of what the block device provides, which + cannot work. HEAD^ should have fixed that. + ("Something like a block device" means a loop device if we can use + one ("sudo -n losetup" works), or a FUSE block export with + growable=false otherwise.) + +Reviewed-by: Eric Blake +Signed-off-by: Hanna Reitz +Message-Id: <20220405134652.19278-3-hreitz@redhat.com> +(cherry picked from commit 9ffd6d646d1d5ee9087a8cbf0b7d2f96c5656162) + +Conflicts: +- 108: The downstream qemu-storage-daemon does not support --daemonize, + so this switch has been replaced by a loop waiting for the PID file to + appear + +Signed-off-by: Hanna Reitz +--- + tests/qemu-iotests/108 | 263 ++++++++++++++++++++++++++++++++++++- + tests/qemu-iotests/108.out | 81 ++++++++++++ + 2 files changed, 343 insertions(+), 1 deletion(-) + +diff --git a/tests/qemu-iotests/108 b/tests/qemu-iotests/108 +index 56339ab2c5..a3090e2875 100755 +--- a/tests/qemu-iotests/108 ++++ b/tests/qemu-iotests/108 +@@ -30,13 +30,20 @@ status=1 # failure is the default! + + _cleanup() + { +- _cleanup_test_img ++ _cleanup_test_img ++ if [ -f "$TEST_DIR/qsd.pid" ]; then ++ qsd_pid=$(cat "$TEST_DIR/qsd.pid") ++ kill -KILL "$qsd_pid" ++ fusermount -u "$TEST_DIR/fuse-export" &>/dev/null ++ fi ++ rm -f "$TEST_DIR/fuse-export" + } + trap "_cleanup; exit \$status" 0 1 2 3 15 + + # get standard environment, filters and checks + . ./common.rc + . ./common.filter ++. ./common.qemu + + # This tests qcow2-specific low-level functionality + _supported_fmt qcow2 +@@ -47,6 +54,22 @@ _supported_os Linux + # files + _unsupported_imgopts 'refcount_bits=\([^1]\|.\([^6]\|$\)\)' data_file + ++# This test either needs sudo -n losetup or FUSE exports to work ++if sudo -n losetup &>/dev/null; then ++ loopdev=true ++else ++ loopdev=false ++ ++ # QSD --export fuse will either yield "Parameter 'id' is missing" ++ # or "Invalid parameter 'fuse'", depending on whether there is ++ # FUSE support or not. ++ error=$($QSD --export fuse 2>&1) ++ if [[ $error = *"'fuse'"* ]]; then ++ _notrun 'Passwordless sudo for losetup or FUSE support required, but' \ ++ 'neither is available' ++ fi ++fi ++ + echo + echo '=== Repairing an image without any refcount table ===' + echo +@@ -138,6 +161,244 @@ _make_test_img 64M + poke_file "$TEST_IMG" $((0x10008)) "\xff\xff\xff\xff\xff\xff\x00\x00" + _check_test_img -r all + ++echo ++echo '=== Check rebuilt reftable location ===' ++ ++# In an earlier version of the refcount rebuild algorithm, the ++# reftable was generally placed at the image end (unless something was ++# allocated in the area covered by the refblock right before the image ++# file end, then we would try to place the reftable in that refblock). ++# This was later changed so the reftable would be placed in the ++# earliest possible location. Test this. ++ ++echo ++echo '--- Does the image size increase? ---' ++echo ++ ++# First test: Just create some image, write some data to it, and ++# resize it so there is free space at the end of the image (enough ++# that it spans at least one full refblock, which for cluster_size=512 ++# images, spans 128k). With the old algorithm, the reftable would ++# have then been placed at the end of the image file, but with the new ++# one, it will be put in that free space. ++# We want to check whether the size of the image file increases due to ++# rebuilding the refcount structures (it should not). ++ ++_make_test_img -o 'cluster_size=512' 1M ++# Write something ++$QEMU_IO -c 'write 0 64k' "$TEST_IMG" | _filter_qemu_io ++ ++# Add free space ++file_len=$(stat -c '%s' "$TEST_IMG") ++truncate -s $((file_len + 256 * 1024)) "$TEST_IMG" ++ ++# Corrupt the image by saying the image header was not allocated ++rt_offset=$(peek_file_be "$TEST_IMG" 48 8) ++rb_offset=$(peek_file_be "$TEST_IMG" $rt_offset 8) ++poke_file "$TEST_IMG" $rb_offset "\x00\x00" ++ ++# Check whether rebuilding the refcount structures increases the image ++# file size ++file_len=$(stat -c '%s' "$TEST_IMG") ++echo ++# The only leaks there can be are the old refcount structures that are ++# leaked during rebuilding, no need to clutter the output with them ++_check_test_img -r all | grep -v '^Repairing cluster.*refcount=1 reference=0' ++echo ++post_repair_file_len=$(stat -c '%s' "$TEST_IMG") ++ ++if [[ $file_len -eq $post_repair_file_len ]]; then ++ echo 'OK: Image size did not change' ++else ++ echo 'ERROR: Image size differs' \ ++ "($file_len before, $post_repair_file_len after)" ++fi ++ ++echo ++echo '--- Will the reftable occupy a hole specifically left for it? ---' ++echo ++ ++# Note: With cluster_size=512, every refblock covers 128k. ++# The reftable covers 8M per reftable cluster. ++ ++# Create an image that requires two reftable clusters (just because ++# this is more interesting than a single-clustered reftable). ++_make_test_img -o 'cluster_size=512' 9M ++$QEMU_IO -c 'write 0 8M' "$TEST_IMG" | _filter_qemu_io ++ ++# Writing 8M will have resized the reftable. Unfortunately, doing so ++# will leave holes in the file, so we need to fill them up so we can ++# be sure the whole file is allocated. Do that by writing ++# consecutively smaller chunks starting from 8 MB, until the file ++# length increases even with a chunk size of 512. Then we must have ++# filled all holes. ++ofs=$((8 * 1024 * 1024)) ++block_len=$((16 * 1024)) ++while [[ $block_len -ge 512 ]]; do ++ file_len=$(stat -c '%s' "$TEST_IMG") ++ while [[ $(stat -c '%s' "$TEST_IMG") -eq $file_len ]]; do ++ # Do not include this in the reference output, it does not ++ # really matter which qemu-io calls we do here exactly ++ $QEMU_IO -c "write $ofs $block_len" "$TEST_IMG" >/dev/null ++ ofs=$((ofs + block_len)) ++ done ++ block_len=$((block_len / 2)) ++done ++ ++# Fill up to 9M (do not include this in the reference output either, ++# $ofs is random for all we know) ++$QEMU_IO -c "write $ofs $((9 * 1024 * 1024 - ofs))" "$TEST_IMG" >/dev/null ++ ++# Make space as follows: ++# - For the first refblock: Right at the beginning of the image (this ++# refblock is placed in the first place possible), ++# - For the reftable somewhere soon afterwards, still near the ++# beginning of the image (i.e. covered by the first refblock); the ++# reftable too is placed in the first place possible, but only after ++# all refblocks have been placed) ++# No space is needed for the other refblocks, because no refblock is ++# put before the space it covers. In this test case, we do not mind ++# if they are placed at the image file's end. ++ ++# Before we make that space, we have to find out the host offset of ++# the area that belonged to the two data clusters at guest offset 4k, ++# because we expect the reftable to be placed there, and we will have ++# to verify that it is. ++ ++l1_offset=$(peek_file_be "$TEST_IMG" 40 8) ++l2_offset=$(peek_file_be "$TEST_IMG" $l1_offset 8) ++l2_offset=$((l2_offset & 0x00fffffffffffe00)) ++data_4k_offset=$(peek_file_be "$TEST_IMG" \ ++ $((l2_offset + 4096 / 512 * 8)) 8) ++data_4k_offset=$((data_4k_offset & 0x00fffffffffffe00)) ++ ++$QEMU_IO -c "discard 0 512" -c "discard 4k 1k" "$TEST_IMG" | _filter_qemu_io ++ ++# Corrupt the image by saying the image header was not allocated ++rt_offset=$(peek_file_be "$TEST_IMG" 48 8) ++rb_offset=$(peek_file_be "$TEST_IMG" $rt_offset 8) ++poke_file "$TEST_IMG" $rb_offset "\x00\x00" ++ ++echo ++# The only leaks there can be are the old refcount structures that are ++# leaked during rebuilding, no need to clutter the output with them ++_check_test_img -r all | grep -v '^Repairing cluster.*refcount=1 reference=0' ++echo ++ ++# Check whether the reftable was put where we expected ++rt_offset=$(peek_file_be "$TEST_IMG" 48 8) ++if [[ $rt_offset -eq $data_4k_offset ]]; then ++ echo 'OK: Reftable is where we expect it' ++else ++ echo "ERROR: Reftable is at $rt_offset, but was expected at $data_4k_offset" ++fi ++ ++echo ++echo '--- Rebuilding refcount structures on block devices ---' ++echo ++ ++# A block device cannot really grow, at least not during qemu-img ++# check. As mentioned in the above cases, rebuilding the refcount ++# structure may lead to new refcount structures being written after ++# the end of the image, and in the past that happened even if there ++# was more than sufficient space in the image. Such post-EOF writes ++# will not work on block devices, so test that the new algorithm ++# avoids it. ++ ++# If we have passwordless sudo and losetup, we can use those to create ++# a block device. Otherwise, we can resort to qemu's FUSE export to ++# create a file that isn't growable, which effectively tests the same ++# thing. ++ ++_cleanup_test_img ++truncate -s $((64 * 1024 * 1024)) "$TEST_IMG" ++ ++if $loopdev; then ++ export_mp=$(sudo -n losetup --show -f "$TEST_IMG") ++ export_mp_driver=host_device ++ sudo -n chmod go+rw "$export_mp" ++else ++ # Create non-growable FUSE export that is a bit like an empty ++ # block device ++ export_mp="$TEST_DIR/fuse-export" ++ export_mp_driver=file ++ touch "$export_mp" ++ ++ $QSD \ ++ --blockdev file,node-name=export-node,filename="$TEST_IMG" \ ++ --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=off \ ++ --pidfile "$TEST_DIR/qsd.pid" \ ++ & ++ ++ while [ ! -f "$TEST_DIR/qsd.pid" ]; do ++ sleep 0.1 ++ done ++fi ++ ++# Now create a qcow2 image on the device -- unfortunately, qemu-img ++# create force-creates the file, so we have to resort to the ++# blockdev-create job. ++_launch_qemu \ ++ --blockdev $export_mp_driver,node-name=file,filename="$export_mp" ++ ++_send_qemu_cmd \ ++ $QEMU_HANDLE \ ++ '{ "execute": "qmp_capabilities" }' \ ++ 'return' ++ ++# Small cluster size again, so the image needs multiple refblocks ++_send_qemu_cmd \ ++ $QEMU_HANDLE \ ++ '{ "execute": "blockdev-create", ++ "arguments": { ++ "job-id": "create", ++ "options": { ++ "driver": "qcow2", ++ "file": "file", ++ "size": '$((64 * 1024 * 1024))', ++ "cluster-size": 512 ++ } } }' \ ++ '"concluded"' ++ ++_send_qemu_cmd \ ++ $QEMU_HANDLE \ ++ '{ "execute": "job-dismiss", "arguments": { "id": "create" } }' \ ++ 'return' ++ ++_send_qemu_cmd \ ++ $QEMU_HANDLE \ ++ '{ "execute": "quit" }' \ ++ 'return' ++ ++wait=y _cleanup_qemu ++echo ++ ++# Write some data ++$QEMU_IO -c 'write 0 64k' "$export_mp" | _filter_qemu_io ++ ++# Corrupt the image by saying the image header was not allocated ++rt_offset=$(peek_file_be "$export_mp" 48 8) ++rb_offset=$(peek_file_be "$export_mp" $rt_offset 8) ++poke_file "$export_mp" $rb_offset "\x00\x00" ++ ++# Repairing such a simple case should just work ++# (We used to put the reftable at the end of the image file, which can ++# never work for non-growable devices.) ++echo ++TEST_IMG="$export_mp" _check_test_img -r all \ ++ | grep -v '^Repairing cluster.*refcount=1 reference=0' ++ ++if $loopdev; then ++ sudo -n losetup -d "$export_mp" ++else ++ qsd_pid=$(cat "$TEST_DIR/qsd.pid") ++ kill -TERM "$qsd_pid" ++ # Wait for process to exit (cannot `wait` because the QSD is daemonized) ++ while [ -f "$TEST_DIR/qsd.pid" ]; do ++ true ++ done ++fi ++ + # success, all done + echo '*** done' + rm -f $seq.full +diff --git a/tests/qemu-iotests/108.out b/tests/qemu-iotests/108.out +index 75bab8dc84..b5401d788d 100644 +--- a/tests/qemu-iotests/108.out ++++ b/tests/qemu-iotests/108.out +@@ -105,6 +105,87 @@ The following inconsistencies were found and repaired: + 0 leaked clusters + 1 corruptions + ++Double checking the fixed image now... ++No errors were found on the image. ++ ++=== Check rebuilt reftable location === ++ ++--- Does the image size increase? --- ++ ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 ++wrote 65536/65536 bytes at offset 0 ++64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++ERROR cluster 0 refcount=0 reference=1 ++Rebuilding refcount structure ++The following inconsistencies were found and repaired: ++ ++ 0 leaked clusters ++ 1 corruptions ++ ++Double checking the fixed image now... ++No errors were found on the image. ++ ++OK: Image size did not change ++ ++--- Will the reftable occupy a hole specifically left for it? --- ++ ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=9437184 ++wrote 8388608/8388608 bytes at offset 0 ++8 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++discard 512/512 bytes at offset 0 ++512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++discard 1024/1024 bytes at offset 4096 ++1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++ERROR cluster 0 refcount=0 reference=1 ++Rebuilding refcount structure ++The following inconsistencies were found and repaired: ++ ++ 0 leaked clusters ++ 1 corruptions ++ ++Double checking the fixed image now... ++No errors were found on the image. ++ ++OK: Reftable is where we expect it ++ ++--- Rebuilding refcount structures on block devices --- ++ ++{ "execute": "qmp_capabilities" } ++{"return": {}} ++{ "execute": "blockdev-create", ++ "arguments": { ++ "job-id": "create", ++ "options": { ++ "driver": "IMGFMT", ++ "file": "file", ++ "size": 67108864, ++ "cluster-size": 512 ++ } } } ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "create"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "create"}} ++{"return": {}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "create"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "create"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "create"}} ++{ "execute": "job-dismiss", "arguments": { "id": "create" } } ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "create"}} ++{"return": {}} ++{ "execute": "quit" } ++{"return": {}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++ ++wrote 65536/65536 bytes at offset 0 ++64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++ERROR cluster 0 refcount=0 reference=1 ++Rebuilding refcount structure ++The following inconsistencies were found and repaired: ++ ++ 0 leaked clusters ++ 1 corruptions ++ + Double checking the fixed image now... + No errors were found on the image. + *** done +-- +2.31.1 + diff --git a/kvm-iotests-281-Let-NBD-connection-yield-in-iothread.patch b/kvm-iotests-281-Let-NBD-connection-yield-in-iothread.patch new file mode 100644 index 0000000..b703c23 --- /dev/null +++ b/kvm-iotests-281-Let-NBD-connection-yield-in-iothread.patch @@ -0,0 +1,108 @@ +From 2ed48247fd39ade97164dee3c65162b96a116f14 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Fri, 4 Feb 2022 12:10:12 +0100 +Subject: [PATCH 6/6] iotests/281: Let NBD connection yield in iothread + +RH-Author: Hanna Reitz +RH-MergeRequest: 117: block/nbd: Handle AioContext changes +RH-Commit: [6/6] a23706f34022d301eb7ffc84fc0d0a77d72b9844 +RH-Bugzilla: 2035185 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +Put an NBD block device into an I/O thread, and then read data from it, +hoping that the NBD connection will yield during that read. When it +does, the coroutine must be reentered in the block device's I/O thread, +which will only happen if the NBD block driver attaches the connection's +QIOChannel to the new AioContext. It did not do that after 4ddb5d2fde +("block/nbd: drop connection_co") and prior to "block/nbd: Move s->ioc +on AioContext change", which would cause an assertion failure. + +To improve our chances of yielding, the NBD server is throttled to +reading 64 kB/s, and the NBD client reads 128 kB, so it should yield at +some point. + +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Reitz +Signed-off-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 8cfbe929e8c26050f0a4580a1606a370a947d4ce) +Signed-off-by: Hanna Reitz +--- + tests/qemu-iotests/281 | 28 +++++++++++++++++++++++++--- + tests/qemu-iotests/281.out | 4 ++-- + 2 files changed, 27 insertions(+), 5 deletions(-) + +diff --git a/tests/qemu-iotests/281 b/tests/qemu-iotests/281 +index 13c588be75..b2ead7f388 100755 +--- a/tests/qemu-iotests/281 ++++ b/tests/qemu-iotests/281 +@@ -253,8 +253,9 @@ class TestYieldingAndTimers(iotests.QMPTestCase): + self.create_nbd_export() + + # Simple VM with an NBD block device connected to the NBD export +- # provided by the QSD ++ # provided by the QSD, and an (initially unused) iothread + self.vm = iotests.VM() ++ self.vm.add_object('iothread,id=iothr') + self.vm.add_blockdev('nbd,node-name=nbd,server.type=unix,' + + f'server.path={self.sock},export=exp,' + + 'reconnect-delay=1') +@@ -293,19 +294,40 @@ class TestYieldingAndTimers(iotests.QMPTestCase): + # thus not see the error, and so the test will pass.) + time.sleep(2) + ++ def test_yield_in_iothread(self): ++ # Move the NBD node to the I/O thread; the NBD block driver should ++ # attach the connection's QIOChannel to that thread's AioContext, too ++ result = self.vm.qmp('x-blockdev-set-iothread', ++ node_name='nbd', iothread='iothr') ++ self.assert_qmp(result, 'return', {}) ++ ++ # Do some I/O that will be throttled by the QSD, so that the network ++ # connection hopefully will yield here. When it is resumed, it must ++ # then be resumed in the I/O thread's AioContext. ++ result = self.vm.qmp('human-monitor-command', ++ command_line='qemu-io nbd "read 0 128K"') ++ self.assert_qmp(result, 'return', '') ++ + def create_nbd_export(self): + assert self.qsd is None + +- # Simple NBD export of a null-co BDS ++ # Export a throttled null-co BDS: Reads are throttled (max 64 kB/s), ++ # writes are not. + self.qsd = QemuStorageDaemon( ++ '--object', ++ 'throttle-group,id=thrgr,x-bps-read=65536,x-bps-read-max=65536', ++ + '--blockdev', + 'null-co,node-name=null,read-zeroes=true', + ++ '--blockdev', ++ 'throttle,node-name=thr,file=null,throttle-group=thrgr', ++ + '--nbd-server', + f'addr.type=unix,addr.path={self.sock}', + + '--export', +- 'nbd,id=exp,node-name=null,name=exp,writable=true' ++ 'nbd,id=exp,node-name=thr,name=exp,writable=true' + ) + + def stop_nbd_export(self): +diff --git a/tests/qemu-iotests/281.out b/tests/qemu-iotests/281.out +index 914e3737bd..3f8a935a08 100644 +--- a/tests/qemu-iotests/281.out ++++ b/tests/qemu-iotests/281.out +@@ -1,5 +1,5 @@ +-..... ++...... + ---------------------------------------------------------------------- +-Ran 5 tests ++Ran 6 tests + + OK +-- +2.27.0 + diff --git a/kvm-iotests-281-Test-lingering-timers.patch b/kvm-iotests-281-Test-lingering-timers.patch new file mode 100644 index 0000000..c31b413 --- /dev/null +++ b/kvm-iotests-281-Test-lingering-timers.patch @@ -0,0 +1,174 @@ +From b56684f6c1bef4fb5bf87ac5a1106d3830c05ad0 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Fri, 4 Feb 2022 12:10:10 +0100 +Subject: [PATCH 4/6] iotests/281: Test lingering timers + +RH-Author: Hanna Reitz +RH-MergeRequest: 117: block/nbd: Handle AioContext changes +RH-Commit: [4/6] aaad466941637a34224dc037bbea37d128b5676b +RH-Bugzilla: 2035185 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +Prior to "block/nbd: Delete reconnect delay timer when done" and +"block/nbd: Delete open timer when done", both of those timers would +remain scheduled even after successfully (re-)connecting to the server, +and they would not even be deleted when the BDS is deleted. + +This test constructs exactly this situation: +(1) Configure an @open-timeout, so the open timer is armed, and +(2) Configure a @reconnect-delay and trigger a reconnect situation + (which succeeds immediately), so the reconnect delay timer is armed. +Then we immediately delete the BDS, and sleep for longer than the +@open-timeout and @reconnect-delay. Prior to said patches, this caused +one (or both) of the timer CBs to access already-freed data. + +Accessing freed data may or may not crash, so this test can produce +false successes, but I do not know how to show the problem in a better +or more reliable way. If you run this test on "block/nbd: Assert there +are no timers when closed" and without the fix patches mentioned above, +you should reliably see an assertion failure. +(But all other tests that use the reconnect delay timer (264 and 277) +will fail in that configuration, too; as will nbd-reconnect-on-open, +which uses the open timer.) + +Remove this test from the quick group because of the two second sleep +this patch introduces. + +(I decided to put this test case into 281, because the main bug this +series addresses is in the interaction of the NBD block driver and I/O +threads, which is precisely the scope of 281. The test case for that +other bug will also be put into the test class added here. + +Also, excuse the test class's name, I couldn't come up with anything +better. The "yield" part will make sense two patches from now.) + +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Reitz +Signed-off-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit eaf1e85d4ddefdbd197f393fa9c5acc7ba8133b0) + +Conflict: +- @open-timeout was introduced after the 6.2 release, and has not been + backported. Consequently, there is no open_timer, and we can (and + must) drop the respective parts of the test here. + +Signed-off-by: Hanna Reitz +--- + tests/qemu-iotests/281 | 73 ++++++++++++++++++++++++++++++++++++-- + tests/qemu-iotests/281.out | 4 +-- + 2 files changed, 73 insertions(+), 4 deletions(-) + +diff --git a/tests/qemu-iotests/281 b/tests/qemu-iotests/281 +index 956698083f..13c588be75 100755 +--- a/tests/qemu-iotests/281 ++++ b/tests/qemu-iotests/281 +@@ -1,5 +1,5 @@ + #!/usr/bin/env python3 +-# group: rw quick ++# group: rw + # + # Test cases for blockdev + IOThread interactions + # +@@ -20,8 +20,9 @@ + # + + import os ++import time + import iotests +-from iotests import qemu_img ++from iotests import qemu_img, QemuStorageDaemon + + image_len = 64 * 1024 * 1024 + +@@ -243,6 +244,74 @@ class TestBlockdevBackupAbort(iotests.QMPTestCase): + # Hangs on failure, we expect this error. + self.assert_qmp(result, 'error/class', 'GenericError') + ++# Test for RHBZ#2033626 ++class TestYieldingAndTimers(iotests.QMPTestCase): ++ sock = os.path.join(iotests.sock_dir, 'nbd.sock') ++ qsd = None ++ ++ def setUp(self): ++ self.create_nbd_export() ++ ++ # Simple VM with an NBD block device connected to the NBD export ++ # provided by the QSD ++ self.vm = iotests.VM() ++ self.vm.add_blockdev('nbd,node-name=nbd,server.type=unix,' + ++ f'server.path={self.sock},export=exp,' + ++ 'reconnect-delay=1') ++ ++ self.vm.launch() ++ ++ def tearDown(self): ++ self.stop_nbd_export() ++ self.vm.shutdown() ++ ++ def test_timers_with_blockdev_del(self): ++ # Stop and restart the NBD server, and do some I/O on the client to ++ # trigger a reconnect and start the reconnect delay timer ++ self.stop_nbd_export() ++ self.create_nbd_export() ++ ++ result = self.vm.qmp('human-monitor-command', ++ command_line='qemu-io nbd "write 0 512"') ++ self.assert_qmp(result, 'return', '') ++ ++ # Reconnect is done, so the reconnect delay timer should be gone. ++ # (But there used to be a bug where it remained active, for which this ++ # is a regression test.) ++ ++ # Delete the BDS to see whether the timer is gone. If it is not, ++ # it will remain active, fire later, and then access freed data. ++ # (Or, with "block/nbd: Assert there are no timers when closed" ++ # applied, the assertion added in that patch will fail.) ++ result = self.vm.qmp('blockdev-del', node_name='nbd') ++ self.assert_qmp(result, 'return', {}) ++ ++ # Give the timer some time to fire (it has a timeout of 1 s). ++ # (Sleeping in an iotest may ring some alarm bells, but note that if ++ # the timing is off here, the test will just always pass. If we kill ++ # the VM too early, then we just kill the timer before it can fire, ++ # thus not see the error, and so the test will pass.) ++ time.sleep(2) ++ ++ def create_nbd_export(self): ++ assert self.qsd is None ++ ++ # Simple NBD export of a null-co BDS ++ self.qsd = QemuStorageDaemon( ++ '--blockdev', ++ 'null-co,node-name=null,read-zeroes=true', ++ ++ '--nbd-server', ++ f'addr.type=unix,addr.path={self.sock}', ++ ++ '--export', ++ 'nbd,id=exp,node-name=null,name=exp,writable=true' ++ ) ++ ++ def stop_nbd_export(self): ++ self.qsd.stop() ++ self.qsd = None ++ + if __name__ == '__main__': + iotests.main(supported_fmts=['qcow2'], + supported_protocols=['file']) +diff --git a/tests/qemu-iotests/281.out b/tests/qemu-iotests/281.out +index 89968f35d7..914e3737bd 100644 +--- a/tests/qemu-iotests/281.out ++++ b/tests/qemu-iotests/281.out +@@ -1,5 +1,5 @@ +-.... ++..... + ---------------------------------------------------------------------- +-Ran 4 tests ++Ran 5 tests + + OK +-- +2.27.0 + diff --git a/kvm-iotests-Test-blockdev-reopen-with-iothreads-and-thro.patch b/kvm-iotests-Test-blockdev-reopen-with-iothreads-and-thro.patch new file mode 100644 index 0000000..1caf73c --- /dev/null +++ b/kvm-iotests-Test-blockdev-reopen-with-iothreads-and-thro.patch @@ -0,0 +1,106 @@ +From ea4d8424fb2053b1cbb9538190b2b06351054125 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 3 Feb 2022 15:05:34 +0100 +Subject: [PATCH 3/5] iotests: Test blockdev-reopen with iothreads and + throttling + +RH-Author: Kevin Wolf +RH-MergeRequest: 142: block: Lock AioContext for drain_end in blockdev-reopen +RH-Commit: [2/2] 91d365864c391ca7db7db13260913fb61987b833 +RH-Bugzilla: 2067118 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Hanna Reitz + +The 'throttle' block driver implements .bdrv_co_drain_end, so +blockdev-reopen will have to wait for it to complete in the polling +loop at the end of qmp_blockdev_reopen(). This makes AIO_WAIT_WHILE() +release the AioContext lock, which causes a crash if the lock hasn't +correctly been taken. + +Signed-off-by: Kevin Wolf +Message-Id: <20220203140534.36522-3-kwolf@redhat.com> +Reviewed-by: Hanna Reitz +Signed-off-by: Kevin Wolf +(cherry picked from commit ee810602376125ca0e0afd6b7c715e13740978ea) +Signed-off-by: Kevin Wolf +--- + tests/qemu-iotests/245 | 36 +++++++++++++++++++++++++++++++++--- + tests/qemu-iotests/245.out | 4 ++-- + 2 files changed, 35 insertions(+), 5 deletions(-) + +diff --git a/tests/qemu-iotests/245 b/tests/qemu-iotests/245 +index 24ac43f70e..8cbed7821b 100755 +--- a/tests/qemu-iotests/245 ++++ b/tests/qemu-iotests/245 +@@ -1138,12 +1138,13 @@ class TestBlockdevReopen(iotests.QMPTestCase): + self.assertEqual(self.get_node('hd1'), None) + self.assert_qmp(self.get_node('hd2'), 'ro', True) + +- def run_test_iothreads(self, iothread_a, iothread_b, errmsg = None): +- opts = hd_opts(0) ++ def run_test_iothreads(self, iothread_a, iothread_b, errmsg = None, ++ opts_a = None, opts_b = None): ++ opts = opts_a or hd_opts(0) + result = self.vm.qmp('blockdev-add', conv_keys = False, **opts) + self.assert_qmp(result, 'return', {}) + +- opts2 = hd_opts(2) ++ opts2 = opts_b or hd_opts(2) + result = self.vm.qmp('blockdev-add', conv_keys = False, **opts2) + self.assert_qmp(result, 'return', {}) + +@@ -1194,6 +1195,35 @@ class TestBlockdevReopen(iotests.QMPTestCase): + def test_iothreads_switch_overlay(self): + self.run_test_iothreads('', 'iothread0') + ++ def test_iothreads_with_throttling(self): ++ # Create a throttle-group object ++ opts = { 'qom-type': 'throttle-group', 'id': 'group0', ++ 'limits': { 'iops-total': 1000 } } ++ result = self.vm.qmp('object-add', conv_keys = False, **opts) ++ self.assert_qmp(result, 'return', {}) ++ ++ # Options with a throttle filter between format and protocol ++ opts = [ ++ { ++ 'driver': iotests.imgfmt, ++ 'node-name': f'hd{idx}', ++ 'file' : { ++ 'node-name': f'hd{idx}-throttle', ++ 'driver': 'throttle', ++ 'throttle-group': 'group0', ++ 'file': { ++ 'driver': 'file', ++ 'node-name': f'hd{idx}-file', ++ 'filename': hd_path[idx], ++ }, ++ }, ++ } ++ for idx in (0, 2) ++ ] ++ ++ self.run_test_iothreads('iothread0', 'iothread0', None, ++ opts[0], opts[1]) ++ + if __name__ == '__main__': + iotests.activate_logging() + iotests.main(supported_fmts=["qcow2"], +diff --git a/tests/qemu-iotests/245.out b/tests/qemu-iotests/245.out +index 4eced19294..a4e04a3266 100644 +--- a/tests/qemu-iotests/245.out ++++ b/tests/qemu-iotests/245.out +@@ -17,8 +17,8 @@ read 1/1 bytes at offset 262152 + read 1/1 bytes at offset 262160 + 1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +-............... ++................ + ---------------------------------------------------------------------- +-Ran 25 tests ++Ran 26 tests + + OK +-- +2.27.0 + diff --git a/kvm-iotests-block-status-cache-New-test.patch b/kvm-iotests-block-status-cache-New-test.patch new file mode 100644 index 0000000..25f057c --- /dev/null +++ b/kvm-iotests-block-status-cache-New-test.patch @@ -0,0 +1,197 @@ +From 0ba4c0836f702bb3abbd173c7ee486a8247331ae Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Tue, 18 Jan 2022 18:00:00 +0100 +Subject: [PATCH 7/7] iotests/block-status-cache: New test + +RH-Author: Hanna Reitz +RH-MergeRequest: 112: block/io: Update BSC only if want_zero is true +RH-Commit: [2/2] ba86b4db32c33e17a85f476d445ef0523cf8f60e +RH-Bugzilla: 2041480 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf + +Add a new test to verify that want_zero=false block-status calls do not +pollute the block-status cache for want_zero=true calls. + +We check want_zero=true calls and their results using `qemu-img map` +(over NBD), and want_zero=false calls also using `qemu-img map` over +NBD, but using the qemu:allocation-depth context. + +(This test case cannot be integrated into nbd-qemu-allocation, because +that is a qcow2 test, and this is a raw test.) + +Signed-off-by: Hanna Reitz +Message-Id: <20220118170000.49423-3-hreitz@redhat.com> +Reviewed-by: Nir Soffer +Reviewed-by: Eric Blake +Tested-by: Eric Blake +Signed-off-by: Eric Blake +(cherry picked from commit 6384dd534d742123d26c008d9794b20bc41359d5) +Signed-off-by: Hanna Reitz +--- + tests/qemu-iotests/tests/block-status-cache | 139 ++++++++++++++++++ + .../qemu-iotests/tests/block-status-cache.out | 5 + + 2 files changed, 144 insertions(+) + create mode 100755 tests/qemu-iotests/tests/block-status-cache + create mode 100644 tests/qemu-iotests/tests/block-status-cache.out + +diff --git a/tests/qemu-iotests/tests/block-status-cache b/tests/qemu-iotests/tests/block-status-cache +new file mode 100755 +index 0000000000..6fa10bb8f8 +--- /dev/null ++++ b/tests/qemu-iotests/tests/block-status-cache +@@ -0,0 +1,139 @@ ++#!/usr/bin/env python3 ++# group: rw quick ++# ++# Test cases for the block-status cache. ++# ++# Copyright (C) 2022 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++import os ++import signal ++import iotests ++from iotests import qemu_img_create, qemu_img_pipe, qemu_nbd ++ ++ ++image_size = 1 * 1024 * 1024 ++test_img = os.path.join(iotests.test_dir, 'test.img') ++ ++nbd_pidfile = os.path.join(iotests.test_dir, 'nbd.pid') ++nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock') ++ ++ ++class TestBscWithNbd(iotests.QMPTestCase): ++ def setUp(self) -> None: ++ """Just create an empty image with a read-only NBD server on it""" ++ assert qemu_img_create('-f', iotests.imgfmt, test_img, ++ str(image_size)) == 0 ++ ++ # Pass --allocation-depth to enable the qemu:allocation-depth context, ++ # which we are going to query to provoke a block-status inquiry with ++ # want_zero=false. ++ assert qemu_nbd(f'--socket={nbd_sock}', ++ f'--format={iotests.imgfmt}', ++ '--persistent', ++ '--allocation-depth', ++ '--read-only', ++ f'--pid-file={nbd_pidfile}', ++ test_img) \ ++ == 0 ++ ++ def tearDown(self) -> None: ++ with open(nbd_pidfile, encoding='utf-8') as f: ++ pid = int(f.read()) ++ os.kill(pid, signal.SIGTERM) ++ os.remove(nbd_pidfile) ++ os.remove(test_img) ++ ++ def test_with_zero_bug(self) -> None: ++ """ ++ Verify that the block-status cache is not corrupted by a ++ want_zero=false call. ++ We can provoke a want_zero=false call with `qemu-img map` over NBD with ++ x-dirty-bitmap=qemu:allocation-depth, so we first run a normal `map` ++ (which results in want_zero=true), then using said ++ qemu:allocation-depth context, and finally another normal `map` to ++ verify that the cache has not been corrupted. ++ """ ++ ++ nbd_img_opts = f'driver=nbd,server.type=unix,server.path={nbd_sock}' ++ nbd_img_opts_alloc_depth = nbd_img_opts + \ ++ ',x-dirty-bitmap=qemu:allocation-depth' ++ ++ # Normal map, results in want_zero=true. ++ # This will probably detect an allocated data sector first (qemu likes ++ # to allocate the first sector to facilitate alignment probing), and ++ # then the rest to be zero. The BSC will thus contain (if anything) ++ # one range covering the first sector. ++ map_pre = qemu_img_pipe('map', '--output=json', '--image-opts', ++ nbd_img_opts) ++ ++ # qemu:allocation-depth maps for want_zero=false. ++ # want_zero=false should (with the file driver, which the server is ++ # using) report everything as data. While this is sufficient for ++ # want_zero=false, this is nothing that should end up in the ++ # block-status cache. ++ # Due to a bug, this information did end up in the cache, though, and ++ # this would lead to wrong information being returned on subsequent ++ # want_zero=true calls. ++ # ++ # We need to run this map twice: On the first call, we probably still ++ # have the first sector in the cache, and so this will be served from ++ # the cache; and only the subsequent range will be queried from the ++ # block driver. This subsequent range will then be entered into the ++ # cache. ++ # If we did a want_zero=true call at this point, we would thus get ++ # correct information: The first sector is not covered by the cache, so ++ # we would get fresh block-status information from the driver, which ++ # would return a data range, and this would then go into the cache, ++ # evicting the wrong range from the want_zero=false call before. ++ # ++ # Therefore, we need a second want_zero=false map to reproduce: ++ # Since the first sector is not in the cache, the query for its status ++ # will go to the driver, which will return a result that reports the ++ # whole image to be a single data area. This result will then go into ++ # the cache, and so the cache will then report the whole image to ++ # contain data. ++ # ++ # Note that once the cache reports the whole image to contain data, any ++ # subsequent map operation will be served from the cache, and so we can ++ # never loop too many times here. ++ for _ in range(2): ++ # (Ignore the result, this is just to contaminate the cache) ++ qemu_img_pipe('map', '--output=json', '--image-opts', ++ nbd_img_opts_alloc_depth) ++ ++ # Now let's see whether the cache reports everything as data, or ++ # whether we get correct information (i.e. the same as we got on our ++ # first attempt). ++ map_post = qemu_img_pipe('map', '--output=json', '--image-opts', ++ nbd_img_opts) ++ ++ if map_pre != map_post: ++ print('ERROR: Map information differs before and after querying ' + ++ 'qemu:allocation-depth') ++ print('Before:') ++ print(map_pre) ++ print('After:') ++ print(map_post) ++ ++ self.fail("Map information differs") ++ ++ ++if __name__ == '__main__': ++ # The block-status cache only works on the protocol layer, so to test it, ++ # we can only use the raw format ++ iotests.main(supported_fmts=['raw'], ++ supported_protocols=['file']) +diff --git a/tests/qemu-iotests/tests/block-status-cache.out b/tests/qemu-iotests/tests/block-status-cache.out +new file mode 100644 +index 0000000000..ae1213e6f8 +--- /dev/null ++++ b/tests/qemu-iotests/tests/block-status-cache.out +@@ -0,0 +1,5 @@ ++. ++---------------------------------------------------------------------- ++Ran 1 tests ++ ++OK +-- +2.27.0 + diff --git a/kvm-iotests-stream-error-on-reset-New-test.patch b/kvm-iotests-stream-error-on-reset-New-test.patch new file mode 100644 index 0000000..0214854 --- /dev/null +++ b/kvm-iotests-stream-error-on-reset-New-test.patch @@ -0,0 +1,198 @@ +From ffdec41922a34b6fe4e7e11f259553d65b41563e Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 11 Jan 2022 15:36:13 +0000 +Subject: [PATCH 4/6] iotests/stream-error-on-reset: New test + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 109: block-backend: prevent dangling BDS pointers across aio_poll() +RH-Commit: [2/2] 0ecb7010d9c121398e7ee22ee47dd85d89bcd941 +RH-Bugzilla: 2021778 2036178 +RH-Acked-by: Hanna Reitz +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Kevin Wolf + +Author: Hanna Reitz + +Test the following scenario: +- Simple stream block in two-layer backing chain (base and top) +- The job is drained via blk_drain(), then an error occurs while the job + settles the ongoing request +- And so the job completes while in blk_drain() + +This was reported as a segfault, but is fixed by "block-backend: prevent +dangling BDS pointers across aio_poll()". + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2036178 +Signed-off-by: Hanna Reitz +Signed-off-by: Stefan Hajnoczi +Message-Id: <20220111153613.25453-3-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 2ca1d5d6b91f8a52a5c651f660b2f58c94bf97ba) +Signed-off-by: Stefan Hajnoczi +--- + .../qemu-iotests/tests/stream-error-on-reset | 140 ++++++++++++++++++ + .../tests/stream-error-on-reset.out | 5 + + 2 files changed, 145 insertions(+) + create mode 100755 tests/qemu-iotests/tests/stream-error-on-reset + create mode 100644 tests/qemu-iotests/tests/stream-error-on-reset.out + +diff --git a/tests/qemu-iotests/tests/stream-error-on-reset b/tests/qemu-iotests/tests/stream-error-on-reset +new file mode 100755 +index 0000000000..7eaedb24d7 +--- /dev/null ++++ b/tests/qemu-iotests/tests/stream-error-on-reset +@@ -0,0 +1,140 @@ ++#!/usr/bin/env python3 ++# group: rw quick ++# ++# Test what happens when a stream job completes in a blk_drain(). ++# ++# Copyright (C) 2022 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++import os ++import iotests ++from iotests import imgfmt, qemu_img_create, qemu_io_silent, QMPTestCase ++ ++ ++image_size = 1 * 1024 * 1024 ++data_size = 64 * 1024 ++base = os.path.join(iotests.test_dir, 'base.img') ++top = os.path.join(iotests.test_dir, 'top.img') ++ ++ ++# We want to test completing a stream job in a blk_drain(). ++# ++# The blk_drain() we are going to use is a virtio-scsi device resetting, ++# which we can trigger by resetting the system. ++# ++# In order to have the block job complete on drain, we (1) throttle its ++# base image so we can start the drain after it has begun, but before it ++# completes, and (2) make it encounter an I/O error on the ensuing write. ++# (If it completes regularly, the completion happens after the drain for ++# some reason.) ++ ++class TestStreamErrorOnReset(QMPTestCase): ++ def setUp(self) -> None: ++ """ ++ Create two images: ++ - base image {base} with {data_size} bytes allocated ++ - top image {top} without any data allocated ++ ++ And the following VM configuration: ++ - base image throttled to {data_size} ++ - top image with a blkdebug configuration so the first write access ++ to it will result in an error ++ - top image is attached to a virtio-scsi device ++ """ ++ assert qemu_img_create('-f', imgfmt, base, str(image_size)) == 0 ++ assert qemu_io_silent('-c', f'write 0 {data_size}', base) == 0 ++ assert qemu_img_create('-f', imgfmt, top, str(image_size)) == 0 ++ ++ self.vm = iotests.VM() ++ self.vm.add_args('-accel', 'tcg') # Make throttling work properly ++ self.vm.add_object(self.vm.qmp_to_opts({ ++ 'qom-type': 'throttle-group', ++ 'id': 'thrgr', ++ 'x-bps-total': str(data_size) ++ })) ++ self.vm.add_blockdev(self.vm.qmp_to_opts({ ++ 'driver': imgfmt, ++ 'node-name': 'base', ++ 'file': { ++ 'driver': 'throttle', ++ 'throttle-group': 'thrgr', ++ 'file': { ++ 'driver': 'file', ++ 'filename': base ++ } ++ } ++ })) ++ self.vm.add_blockdev(self.vm.qmp_to_opts({ ++ 'driver': imgfmt, ++ 'node-name': 'top', ++ 'file': { ++ 'driver': 'blkdebug', ++ 'node-name': 'top-blkdebug', ++ 'inject-error': [{ ++ 'event': 'pwritev', ++ 'immediately': 'true', ++ 'once': 'true' ++ }], ++ 'image': { ++ 'driver': 'file', ++ 'filename': top ++ } ++ }, ++ 'backing': 'base' ++ })) ++ self.vm.add_device(self.vm.qmp_to_opts({ ++ 'driver': 'virtio-scsi', ++ 'id': 'vscsi' ++ })) ++ self.vm.add_device(self.vm.qmp_to_opts({ ++ 'driver': 'scsi-hd', ++ 'bus': 'vscsi.0', ++ 'drive': 'top' ++ })) ++ self.vm.launch() ++ ++ def tearDown(self) -> None: ++ self.vm.shutdown() ++ os.remove(top) ++ os.remove(base) ++ ++ def test_stream_error_on_reset(self) -> None: ++ # Launch a stream job, which will take at least a second to ++ # complete, because the base image is throttled (so we can ++ # get in between it having started and it having completed) ++ res = self.vm.qmp('block-stream', job_id='stream', device='top') ++ self.assert_qmp(res, 'return', {}) ++ ++ while True: ++ ev = self.vm.event_wait('JOB_STATUS_CHANGE') ++ if ev['data']['status'] == 'running': ++ # Once the stream job is running, reset the system, which ++ # forces the virtio-scsi device to be reset, thus draining ++ # the stream job, and making it complete. Completing ++ # inside of that drain should not result in a segfault. ++ res = self.vm.qmp('system_reset') ++ self.assert_qmp(res, 'return', {}) ++ elif ev['data']['status'] == 'null': ++ # The test is done once the job is gone ++ break ++ ++ ++if __name__ == '__main__': ++ # Passes with any format with backing file support, but qed and ++ # qcow1 do not seem to exercise the used-to-be problematic code ++ # path, so there is no point in having them in this list ++ iotests.main(supported_fmts=['qcow2', 'vmdk'], ++ supported_protocols=['file']) +diff --git a/tests/qemu-iotests/tests/stream-error-on-reset.out b/tests/qemu-iotests/tests/stream-error-on-reset.out +new file mode 100644 +index 0000000000..ae1213e6f8 +--- /dev/null ++++ b/tests/qemu-iotests/tests/stream-error-on-reset.out +@@ -0,0 +1,5 @@ ++. ++---------------------------------------------------------------------- ++Ran 1 tests ++ ++OK +-- +2.27.0 + diff --git a/kvm-iotests.py-Add-QemuStorageDaemon-class.patch b/kvm-iotests.py-Add-QemuStorageDaemon-class.patch new file mode 100644 index 0000000..539897f --- /dev/null +++ b/kvm-iotests.py-Add-QemuStorageDaemon-class.patch @@ -0,0 +1,92 @@ +From 34ffcd1a463bd3c1d36ed2f33dd6335b35b38460 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Fri, 4 Feb 2022 12:10:09 +0100 +Subject: [PATCH 3/6] iotests.py: Add QemuStorageDaemon class + +RH-Author: Hanna Reitz +RH-MergeRequest: 117: block/nbd: Handle AioContext changes +RH-Commit: [3/6] 754fe76bc5e8be57f4b78f176531014c4a12b044 +RH-Bugzilla: 2035185 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +This is a rather simple class that allows creating a QSD instance +running in the background and stopping it when no longer needed. + +The __del__ handler is a safety net for when something goes so wrong in +a test that e.g. the tearDown() method is not called (e.g. setUp() +launches the QSD, but then launching a VM fails). We do not want the +QSD to continue running after the test has failed, so __del__() will +take care to kill it. + +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Reitz +Signed-off-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 091dc7b2b5553a529bff9a7bf9ad3bc85bc5bdcd) +Signed-off-by: Hanna Reitz +--- + tests/qemu-iotests/iotests.py | 40 +++++++++++++++++++++++++++++++++++ + 1 file changed, 40 insertions(+) + +diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py +index 83bfedb902..a51b5ce8cd 100644 +--- a/tests/qemu-iotests/iotests.py ++++ b/tests/qemu-iotests/iotests.py +@@ -72,6 +72,8 @@ + qemu_prog = os.environ.get('QEMU_PROG', 'qemu') + qemu_opts = os.environ.get('QEMU_OPTIONS', '').strip().split(' ') + ++qsd_prog = os.environ.get('QSD_PROG', 'qemu-storage-daemon') ++ + gdb_qemu_env = os.environ.get('GDB_OPTIONS') + qemu_gdb = [] + if gdb_qemu_env: +@@ -312,6 +314,44 @@ def cmd(self, cmd): + return self._read_output() + + ++class QemuStorageDaemon: ++ def __init__(self, *args: str, instance_id: str = 'a'): ++ assert '--pidfile' not in args ++ self.pidfile = os.path.join(test_dir, f'qsd-{instance_id}-pid') ++ all_args = [qsd_prog] + list(args) + ['--pidfile', self.pidfile] ++ ++ # Cannot use with here, we want the subprocess to stay around ++ # pylint: disable=consider-using-with ++ self._p = subprocess.Popen(all_args) ++ while not os.path.exists(self.pidfile): ++ if self._p.poll() is not None: ++ cmd = ' '.join(all_args) ++ raise RuntimeError( ++ 'qemu-storage-daemon terminated with exit code ' + ++ f'{self._p.returncode}: {cmd}') ++ ++ time.sleep(0.01) ++ ++ with open(self.pidfile, encoding='utf-8') as f: ++ self._pid = int(f.read().strip()) ++ ++ assert self._pid == self._p.pid ++ ++ def stop(self, kill_signal=15): ++ self._p.send_signal(kill_signal) ++ self._p.wait() ++ self._p = None ++ ++ try: ++ os.remove(self.pidfile) ++ except OSError: ++ pass ++ ++ def __del__(self): ++ if self._p is not None: ++ self.stop(kill_signal=9) ++ ++ + def qemu_nbd(*args): + '''Run qemu-nbd in daemon mode and return the parent's exit code''' + return subprocess.call(qemu_nbd_args + ['--fork'] + list(args)) +-- +2.27.0 + diff --git a/kvm-kvm-don-t-use-perror-without-useful-errno.patch b/kvm-kvm-don-t-use-perror-without-useful-errno.patch new file mode 100644 index 0000000..a78c089 --- /dev/null +++ b/kvm-kvm-don-t-use-perror-without-useful-errno.patch @@ -0,0 +1,62 @@ +From 9ddefaedf423ec03eadaf17496c14e0d7b2381c8 Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Thu, 28 Jul 2022 16:24:46 +0200 +Subject: [PATCH 30/32] kvm: don't use perror() without useful errno + +RH-Author: Cornelia Huck +RH-MergeRequest: 110: kvm: don't use perror() without useful errno +RH-Commit: [1/1] 20e51aac6767c1f89f74c7d692d1fb7689eff5f0 (cohuck/qemu-kvm-c9s) +RH-Bugzilla: 2095608 +RH-Acked-by: Eric Auger +RH-Acked-by: Thomas Huth +RH-Acked-by: Gavin Shan + +perror() is designed to append the decoded errno value to a +string. This, however, only makes sense if we called something that +actually sets errno prior to that. + +For the callers that check for split irqchip support that is not the +case, and we end up with confusing error messages that end in +"success". Use error_report() instead. + +Signed-off-by: Cornelia Huck +Message-Id: <20220728142446.438177-1-cohuck@redhat.com> +Signed-off-by: Paolo Bonzini + +https://bugzilla.redhat.com/show_bug.cgi?id=2095608 +(cherry picked from commit 47c182fe8b03c0c40059fb95840923e65c9bdb4f) +Signed-off-by: Cornelia Huck +--- + accel/kvm/kvm-all.c | 2 +- + target/arm/kvm.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 5f1377ca04..e9c7947640 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2254,7 +2254,7 @@ static void kvm_irqchip_create(KVMState *s) + ret = kvm_arch_irqchip_create(s); + if (ret == 0) { + if (s->kernel_irqchip_split == ON_OFF_AUTO_ON) { +- perror("Split IRQ chip mode not supported."); ++ error_report("Split IRQ chip mode not supported."); + exit(1); + } else { + ret = kvm_vm_ioctl(s, KVM_CREATE_IRQCHIP); +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index bbf1ce7ba3..0a2ba1f8e3 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -960,7 +960,7 @@ void kvm_arch_init_irq_routing(KVMState *s) + int kvm_arch_irqchip_create(KVMState *s) + { + if (kvm_kernel_irqchip_split()) { +- perror("-machine kernel_irqchip=split is not supported on ARM."); ++ error_report("-machine kernel_irqchip=split is not supported on ARM."); + exit(1); + } + +-- +2.31.1 + diff --git a/kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch b/kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch new file mode 100644 index 0000000..f12b8ec --- /dev/null +++ b/kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch @@ -0,0 +1,49 @@ +From 49d9c9dced7278517105e9cfec34ea4af716432d Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 9 Jun 2022 17:47:12 +0100 +Subject: [PATCH 6/6] linux-aio: explain why max batch is checked in + laio_io_unplug() + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 102: linux-aio: fix unbalanced plugged counter in laio_io_unplug() +RH-Commit: [2/2] b3d6421086bde50d4baad2343b2df89c5f66950e (stefanha/centos-stream-qemu-kvm) +RH-Bugzilla: 2092788 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefano Garzarella + +It may not be obvious why laio_io_unplug() checks max batch. I discussed +this with Stefano and have added a comment summarizing the reason. + +Cc: Stefano Garzarella +Cc: Kevin Wolf +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Stefano Garzarella +Message-id: 20220609164712.1539045-3-stefanha@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 99b969fbe105117f5af6060d3afef40ca39cc9c1) +Signed-off-by: Stefan Hajnoczi +--- + block/linux-aio.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/block/linux-aio.c b/block/linux-aio.c +index 6078da7e42..9c2393a2f7 100644 +--- a/block/linux-aio.c ++++ b/block/linux-aio.c +@@ -365,6 +365,12 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s, + assert(s->io_q.plugged); + s->io_q.plugged--; + ++ /* ++ * Why max batch checking is performed here: ++ * Another BDS may have queued requests with a higher dev_max_batch and ++ * therefore in_queue could now exceed our dev_max_batch. Re-check the max ++ * batch so we can honor our device's dev_max_batch. ++ */ + if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) || + (!s->io_q.plugged && + !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending))) { +-- +2.31.1 + diff --git a/kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch b/kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch new file mode 100644 index 0000000..ed9b5ee --- /dev/null +++ b/kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch @@ -0,0 +1,56 @@ +From e7326c3a7e0fc022aa5c0ae07bc1e19ad1b6f2ed Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 9 Jun 2022 17:47:11 +0100 +Subject: [PATCH 5/6] linux-aio: fix unbalanced plugged counter in + laio_io_unplug() + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 102: linux-aio: fix unbalanced plugged counter in laio_io_unplug() +RH-Commit: [1/2] 8a71da371c72521f1d70b8767ee564575e0d522b (stefanha/centos-stream-qemu-kvm) +RH-Bugzilla: 2092788 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefano Garzarella + +Every laio_io_plug() call has a matching laio_io_unplug() call. There is +a plugged counter that tracks the number of levels of plugging and +allows for nesting. + +The plugged counter must reflect the balance between laio_io_plug() and +laio_io_unplug() calls accurately. Otherwise I/O stalls occur since +io_submit(2) calls are skipped while plugged. + +Reported-by: Nikolay Tenev +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Stefano Garzarella +Message-id: 20220609164712.1539045-2-stefanha@redhat.com +Cc: Stefano Garzarella +Fixes: 68d7946648 ("linux-aio: add `dev_max_batch` parameter to laio_io_unplug()") +[Stefano Garzarella suggested adding a Fixes tag. +--Stefan] +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit f387cac5af030a58ac5a0dacf64cab5e5a4fe5c7) +Signed-off-by: Stefan Hajnoczi +--- + block/linux-aio.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/block/linux-aio.c b/block/linux-aio.c +index 4c423fcccf..6078da7e42 100644 +--- a/block/linux-aio.c ++++ b/block/linux-aio.c +@@ -363,8 +363,10 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s, + uint64_t dev_max_batch) + { + assert(s->io_q.plugged); ++ s->io_q.plugged--; ++ + if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) || +- (--s->io_q.plugged == 0 && ++ (!s->io_q.plugged && + !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending))) { + ioq_submit(s); + } +-- +2.31.1 + diff --git a/kvm-meson-create-have_vhost_-variables.patch b/kvm-meson-create-have_vhost_-variables.patch new file mode 100644 index 0000000..fcae620 --- /dev/null +++ b/kvm-meson-create-have_vhost_-variables.patch @@ -0,0 +1,154 @@ +From 51c310097832724bafac26aed81399da40128400 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:50:43 +0200 +Subject: [PATCH 05/32] meson: create have_vhost_* variables +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [5/27] 3b30f89e6d639923dc9d9a92a4261bb4509e5c83 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 2a3129a37652e5e81d12f6e16dd3c447f09831f9 +Author: Paolo Bonzini +Date: Wed Apr 20 17:34:05 2022 +0200 + + meson: create have_vhost_* variables + + When using Meson options rather than config-host.h, the "when" clauses + have to be changed to if statements (which is not necessarily great, + though at least it highlights which parts of the build are per-target + and which are not). + + Do that before moving vhost logic to meson.build, though for now + the variables are just based on config-host.mak data. + + Reviewed-by: Marc-André Lureau + Signed-off-by: Paolo Bonzini + +Signed-off-by: Eugenio Pérez +--- + meson.build | 30 ++++++++++++++++++++---------- + tests/meson.build | 2 +- + tools/meson.build | 2 +- + 3 files changed, 22 insertions(+), 12 deletions(-) + +diff --git a/meson.build b/meson.build +index 13e3323380..735f538497 100644 +--- a/meson.build ++++ b/meson.build +@@ -298,6 +298,15 @@ have_tpm = get_option('tpm') \ + .require(targetos != 'windows', error_message: 'TPM emulation only available on POSIX systems') \ + .allowed() + ++# vhost ++have_vhost_user = 'CONFIG_VHOST_USER' in config_host ++have_vhost_vdpa = 'CONFIG_VHOST_VDPA' in config_host ++have_vhost_kernel = 'CONFIG_VHOST_KERNEL' in config_host ++have_vhost_net_user = 'CONFIG_VHOST_NET_USER' in config_host ++have_vhost_net_vdpa = 'CONFIG_VHOST_NET_VDPA' in config_host ++have_vhost_net = 'CONFIG_VHOST_NET' in config_host ++have_vhost_user_crypto = 'CONFIG_VHOST_CRYPTO' in config_host ++ + # Target-specific libraries and flags + libm = cc.find_library('m', required: false) + threads = dependency('threads') +@@ -1335,7 +1344,7 @@ has_statx_mnt_id = cc.links(statx_mnt_id_test) + have_vhost_user_blk_server = get_option('vhost_user_blk_server') \ + .require(targetos == 'linux', + error_message: 'vhost_user_blk_server requires linux') \ +- .require('CONFIG_VHOST_USER' in config_host, ++ .require(have_vhost_user, + error_message: 'vhost_user_blk_server requires vhost-user support') \ + .disable_auto_if(not have_system) \ + .allowed() +@@ -2116,9 +2125,9 @@ host_kconfig = \ + (have_ivshmem ? ['CONFIG_IVSHMEM=y'] : []) + \ + ('CONFIG_OPENGL' in config_host ? ['CONFIG_OPENGL=y'] : []) + \ + (x11.found() ? ['CONFIG_X11=y'] : []) + \ +- ('CONFIG_VHOST_USER' in config_host ? ['CONFIG_VHOST_USER=y'] : []) + \ +- ('CONFIG_VHOST_VDPA' in config_host ? ['CONFIG_VHOST_VDPA=y'] : []) + \ +- ('CONFIG_VHOST_KERNEL' in config_host ? ['CONFIG_VHOST_KERNEL=y'] : []) + \ ++ (have_vhost_user ? ['CONFIG_VHOST_USER=y'] : []) + \ ++ (have_vhost_vdpa ? ['CONFIG_VHOST_VDPA=y'] : []) + \ ++ (have_vhost_kernel ? ['CONFIG_VHOST_KERNEL=y'] : []) + \ + (have_virtfs ? ['CONFIG_VIRTFS=y'] : []) + \ + ('CONFIG_LINUX' in config_host ? ['CONFIG_LINUX=y'] : []) + \ + ('CONFIG_PVRDMA' in config_host ? ['CONFIG_PVRDMA=y'] : []) + \ +@@ -2799,7 +2808,7 @@ if have_system or have_user + endif + + vhost_user = not_found +-if targetos == 'linux' and 'CONFIG_VHOST_USER' in config_host ++if targetos == 'linux' and have_vhost_user + libvhost_user = subproject('libvhost-user') + vhost_user = libvhost_user.get_variable('vhost_user_dep') + endif +@@ -3386,7 +3395,7 @@ if have_tools + dependencies: qemuutil, + install: true) + +- if 'CONFIG_VHOST_USER' in config_host ++ if have_vhost_user + subdir('contrib/vhost-user-blk') + subdir('contrib/vhost-user-gpu') + subdir('contrib/vhost-user-input') +@@ -3516,15 +3525,16 @@ if 'simple' in get_option('trace_backends') + endif + summary_info += {'D-Bus display': dbus_display} + summary_info += {'QOM debugging': get_option('qom_cast_debug')} +-summary_info += {'vhost-kernel support': config_host.has_key('CONFIG_VHOST_KERNEL')} +-summary_info += {'vhost-net support': config_host.has_key('CONFIG_VHOST_NET')} +-summary_info += {'vhost-crypto support': config_host.has_key('CONFIG_VHOST_CRYPTO')} ++summary_info += {'vhost-kernel support': have_vhost_kernel} ++summary_info += {'vhost-net support': have_vhost_net} ++summary_info += {'vhost-user support': have_vhost_user} ++summary_info += {'vhost-user-crypto support': have_vhost_user_crypto} + summary_info += {'vhost-scsi support': config_host.has_key('CONFIG_VHOST_SCSI')} + summary_info += {'vhost-vsock support': config_host.has_key('CONFIG_VHOST_VSOCK')} +-summary_info += {'vhost-user support': config_host.has_key('CONFIG_VHOST_USER')} + summary_info += {'vhost-user-blk server support': have_vhost_user_blk_server} + summary_info += {'vhost-user-fs support': config_host.has_key('CONFIG_VHOST_USER_FS')} + summary_info += {'vhost-vdpa support': config_host.has_key('CONFIG_VHOST_VDPA')} ++summary_info += {'vhost-vdpa support': have_vhost_vdpa} + summary_info += {'build guest agent': have_ga} + summary(summary_info, bool_yn: true, section: 'Configurable features') + +diff --git a/tests/meson.build b/tests/meson.build +index 1d05109eb4..bbe41c8559 100644 +--- a/tests/meson.build ++++ b/tests/meson.build +@@ -70,7 +70,7 @@ test_deps = { + 'test-qht-par': qht_bench, + } + +-if have_tools and 'CONFIG_VHOST_USER' in config_host and 'CONFIG_LINUX' in config_host ++if have_tools and have_vhost_user and 'CONFIG_LINUX' in config_host + executable('vhost-user-bridge', + sources: files('vhost-user-bridge.c'), + dependencies: [qemuutil, vhost_user]) +diff --git a/tools/meson.build b/tools/meson.build +index 46977af84f..10eb3a043f 100644 +--- a/tools/meson.build ++++ b/tools/meson.build +@@ -3,7 +3,7 @@ have_virtiofsd = get_option('virtiofsd') \ + error_message: 'virtiofsd requires Linux') \ + .require(seccomp.found() and libcap_ng.found(), + error_message: 'virtiofsd requires libcap-ng-devel and seccomp-devel') \ +- .require('CONFIG_VHOST_USER' in config_host, ++ .require(have_vhost_user, + error_message: 'virtiofsd needs vhost-user-support') \ + .disable_auto_if(not have_tools and not have_system) \ + .allowed() +-- +2.31.1 + diff --git a/kvm-meson-use-have_vhost_-variables-to-pick-sources.patch b/kvm-meson-use-have_vhost_-variables-to-pick-sources.patch new file mode 100644 index 0000000..99d86c1 --- /dev/null +++ b/kvm-meson-use-have_vhost_-variables-to-pick-sources.patch @@ -0,0 +1,213 @@ +From a7d57a09e33275d5e6649273b5c9da1bc3c92491 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:51:53 +0200 +Subject: [PATCH 06/32] meson: use have_vhost_* variables to pick sources +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [6/27] bc3db1efb759c0bc97fde2f4fbb3d6dc404c8d3d (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 43b6d7ee1fbc5b5fb7c85d8131fdac1863214ad6 +Author: Paolo Bonzini +Date: Wed Apr 20 17:34:06 2022 +0200 + + meson: use have_vhost_* variables to pick sources + + Reviewed-by: Marc-André Lureau + Signed-off-by: Paolo Bonzini + +Signed-off-by: Eugenio Pérez +--- + Kconfig.host | 3 --- + backends/meson.build | 8 ++++++-- + hw/net/meson.build | 8 ++++++-- + hw/virtio/Kconfig | 3 --- + hw/virtio/meson.build | 25 ++++++++++++++++--------- + meson.build | 1 + + net/meson.build | 12 +++++++----- + tests/qtest/meson.build | 4 +++- + 8 files changed, 39 insertions(+), 25 deletions(-) + +diff --git a/Kconfig.host b/Kconfig.host +index 60b9c07b5e..1165c4eacd 100644 +--- a/Kconfig.host ++++ b/Kconfig.host +@@ -22,15 +22,12 @@ config TPM + + config VHOST_USER + bool +- select VHOST + + config VHOST_VDPA + bool +- select VHOST + + config VHOST_KERNEL + bool +- select VHOST + + config VIRTFS + bool +diff --git a/backends/meson.build b/backends/meson.build +index 6e68945528..cb92f639ca 100644 +--- a/backends/meson.build ++++ b/backends/meson.build +@@ -12,9 +12,13 @@ softmmu_ss.add([files( + softmmu_ss.add(when: 'CONFIG_POSIX', if_true: files('rng-random.c')) + softmmu_ss.add(when: 'CONFIG_POSIX', if_true: files('hostmem-file.c')) + softmmu_ss.add(when: 'CONFIG_LINUX', if_true: files('hostmem-memfd.c')) +-softmmu_ss.add(when: ['CONFIG_VHOST_USER', 'CONFIG_VIRTIO'], if_true: files('vhost-user.c')) ++if have_vhost_user ++ softmmu_ss.add(when: 'CONFIG_VIRTIO', if_true: files('vhost-user.c')) ++endif + softmmu_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost.c')) +-softmmu_ss.add(when: ['CONFIG_VIRTIO_CRYPTO', 'CONFIG_VHOST_CRYPTO'], if_true: files('cryptodev-vhost-user.c')) ++if have_vhost_user_crypto ++ softmmu_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost-user.c')) ++endif + softmmu_ss.add(when: 'CONFIG_GIO', if_true: [files('dbus-vmstate.c'), gio]) + softmmu_ss.add(when: 'CONFIG_SGX', if_true: files('hostmem-epc.c')) + +diff --git a/hw/net/meson.build b/hw/net/meson.build +index 685b75badb..ebac261542 100644 +--- a/hw/net/meson.build ++++ b/hw/net/meson.build +@@ -46,8 +46,12 @@ specific_ss.add(when: 'CONFIG_XILINX_ETHLITE', if_true: files('xilinx_ethlite.c' + softmmu_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('net_rx_pkt.c')) + specific_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('virtio-net.c')) + +-softmmu_ss.add(when: ['CONFIG_VIRTIO_NET', 'CONFIG_VHOST_NET'], if_true: files('vhost_net.c'), if_false: files('vhost_net-stub.c')) +-softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost_net-stub.c')) ++if have_vhost_net ++ softmmu_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost_net.c'), if_false: files('vhost_net-stub.c')) ++ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost_net-stub.c')) ++else ++ softmmu_ss.add(files('vhost_net-stub.c')) ++endif + + softmmu_ss.add(when: 'CONFIG_ETSEC', if_true: files( + 'fsl_etsec/etsec.c', +diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig +index c144d42f9b..8ca7b3d9d6 100644 +--- a/hw/virtio/Kconfig ++++ b/hw/virtio/Kconfig +@@ -1,6 +1,3 @@ +-config VHOST +- bool +- + config VIRTIO + bool + +diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build +index 67dc77e00f..30a832eb4a 100644 +--- a/hw/virtio/meson.build ++++ b/hw/virtio/meson.build +@@ -2,18 +2,22 @@ softmmu_virtio_ss = ss.source_set() + softmmu_virtio_ss.add(files('virtio-bus.c')) + softmmu_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('virtio-pci.c')) + softmmu_virtio_ss.add(when: 'CONFIG_VIRTIO_MMIO', if_true: files('virtio-mmio.c')) +-softmmu_virtio_ss.add(when: 'CONFIG_VHOST', if_false: files('vhost-stub.c')) +- +-softmmu_ss.add_all(when: 'CONFIG_VIRTIO', if_true: softmmu_virtio_ss) +-softmmu_ss.add(when: 'CONFIG_VIRTIO', if_false: files('vhost-stub.c')) +- +-softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-stub.c')) + + virtio_ss = ss.source_set() + virtio_ss.add(files('virtio.c')) +-virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c', 'vhost-iova-tree.c')) +-virtio_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user.c')) +-virtio_ss.add(when: 'CONFIG_VHOST_VDPA', if_true: files('vhost-shadow-virtqueue.c', 'vhost-vdpa.c')) ++ ++if have_vhost ++ virtio_ss.add(files('vhost.c', 'vhost-backend.c', 'vhost-iova-tree.c')) ++ if have_vhost_user ++ virtio_ss.add(files('vhost-user.c')) ++ endif ++ if have_vhost_vdpa ++ virtio_ss.add(files('vhost-vdpa.c', 'vhost-shadow-virtqueue.c')) ++ endif ++else ++ softmmu_virtio_ss.add(files('vhost-stub.c')) ++endif ++ + virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon.c')) + virtio_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto.c')) + virtio_ss.add(when: ['CONFIG_VIRTIO_CRYPTO', 'CONFIG_VIRTIO_PCI'], if_true: files('virtio-crypto-pci.c')) +@@ -53,3 +57,6 @@ virtio_pci_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem-pci.c')) + virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss) + + specific_ss.add_all(when: 'CONFIG_VIRTIO', if_true: virtio_ss) ++softmmu_ss.add_all(when: 'CONFIG_VIRTIO', if_true: softmmu_virtio_ss) ++softmmu_ss.add(when: 'CONFIG_VIRTIO', if_false: files('vhost-stub.c')) ++softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-stub.c')) +diff --git a/meson.build b/meson.build +index 735f538497..9ba675f098 100644 +--- a/meson.build ++++ b/meson.build +@@ -305,6 +305,7 @@ have_vhost_kernel = 'CONFIG_VHOST_KERNEL' in config_host + have_vhost_net_user = 'CONFIG_VHOST_NET_USER' in config_host + have_vhost_net_vdpa = 'CONFIG_VHOST_NET_VDPA' in config_host + have_vhost_net = 'CONFIG_VHOST_NET' in config_host ++have_vhost = have_vhost_user or have_vhost_vdpa or have_vhost_kernel + have_vhost_user_crypto = 'CONFIG_VHOST_CRYPTO' in config_host + + # Target-specific libraries and flags +diff --git a/net/meson.build b/net/meson.build +index 847bc2ac85..c965e83b26 100644 +--- a/net/meson.build ++++ b/net/meson.build +@@ -26,10 +26,10 @@ softmmu_ss.add(when: vde, if_true: files('vde.c')) + if have_netmap + softmmu_ss.add(files('netmap.c')) + endif +-vhost_user_ss = ss.source_set() +-vhost_user_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost-user.c'), if_false: files('vhost-user-stub.c')) +-softmmu_ss.add_all(when: 'CONFIG_VHOST_NET_USER', if_true: vhost_user_ss) +-softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-user-stub.c')) ++if have_vhost_net_user ++ softmmu_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost-user.c'), if_false: files('vhost-user-stub.c')) ++ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-user-stub.c')) ++endif + + softmmu_ss.add(when: 'CONFIG_LINUX', if_true: files('tap-linux.c')) + softmmu_ss.add(when: 'CONFIG_BSD', if_true: files('tap-bsd.c')) +@@ -40,6 +40,8 @@ if not config_host.has_key('CONFIG_LINUX') and not config_host.has_key('CONFIG_B + endif + softmmu_ss.add(when: 'CONFIG_POSIX', if_true: files(tap_posix)) + softmmu_ss.add(when: 'CONFIG_WIN32', if_true: files('tap-win32.c')) +-softmmu_ss.add(when: 'CONFIG_VHOST_NET_VDPA', if_true: files('vhost-vdpa.c')) ++if have_vhost_net_vdpa ++ softmmu_ss.add(files('vhost-vdpa.c')) ++endif + + subdir('can') +diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build +index 67cd32def1..9f550df900 100644 +--- a/tests/qtest/meson.build ++++ b/tests/qtest/meson.build +@@ -269,7 +269,9 @@ qos_test_ss.add( + if have_virtfs + qos_test_ss.add(files('virtio-9p-test.c')) + endif +-qos_test_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user-test.c')) ++if have_vhost_user ++ qos_test_ss.add(files('vhost-user-test.c')) ++endif + if have_tools and have_vhost_user_blk_server + qos_test_ss.add(files('vhost-user-blk-test.c')) + endif +-- +2.31.1 + diff --git a/kvm-meson.build-Fix-docker-test-build-alpine-when-includ.patch b/kvm-meson.build-Fix-docker-test-build-alpine-when-includ.patch new file mode 100644 index 0000000..0da63bf --- /dev/null +++ b/kvm-meson.build-Fix-docker-test-build-alpine-when-includ.patch @@ -0,0 +1,87 @@ +From 7c489b54b0bb33445113fbf16e88feb23be68013 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Fri, 13 May 2022 03:28:30 -0300 +Subject: [PATCH 07/18] meson.build: Fix docker-test-build@alpine when + including linux/errqueue.h +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd +RH-Commit: [1/11] f058eb846fcf611d527a1dd3b0cc399cdc17e3ee (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 1968509 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +A build error happens in alpine CI when linux/errqueue.h is included +in io/channel-socket.c, due to redefining of 'struct __kernel_timespec': + +=== +ninja: job failed: [...] +In file included from /usr/include/linux/errqueue.h:6, + from ../io/channel-socket.c:29: +/usr/include/linux/time_types.h:7:8: error: redefinition of 'struct __kernel_timespec' + 7 | struct __kernel_timespec { + | ^~~~~~~~~~~~~~~~~ +In file included from /usr/include/liburing.h:19, + from /builds/user/qemu/include/block/aio.h:18, + from /builds/user/qemu/include/io/channel.h:26, + from /builds/user/qemu/include/io/channel-socket.h:24, + from ../io/channel-socket.c:24: +/usr/include/liburing/compat.h:9:8: note: originally defined here + 9 | struct __kernel_timespec { + | ^~~~~~~~~~~~~~~~~ +ninja: subcommand failed +=== + +As above error message suggests, 'struct __kernel_timespec' was already +defined by liburing/compat.h. + +Fix alpine CI by adding test to disable liburing in configure step if a +redefinition happens between linux/errqueue.h and liburing/compat.h. + +[dgilbert: This has been fixed in Alpine issue 13813 and liburing] + +Signed-off-by: Leonardo Bras +Message-Id: <20220513062836.965425-2-leobras@redhat.com> +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 354081d43de44ebd3497fe08f7f0121a5517d528) +Signed-off-by: Leonardo Bras +--- + meson.build | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/meson.build b/meson.build +index 5a7c10e639..13e3323380 100644 +--- a/meson.build ++++ b/meson.build +@@ -471,12 +471,23 @@ if not get_option('linux_aio').auto() or have_block + required: get_option('linux_aio'), + kwargs: static_kwargs) + endif ++ ++linux_io_uring_test = ''' ++ #include ++ #include ++ ++ int main(void) { return 0; }''' ++ + linux_io_uring = not_found + if not get_option('linux_io_uring').auto() or have_block + linux_io_uring = dependency('liburing', version: '>=0.3', + required: get_option('linux_io_uring'), + method: 'pkg-config', kwargs: static_kwargs) ++ if not cc.links(linux_io_uring_test) ++ linux_io_uring = not_found ++ endif + endif ++ + libnfs = not_found + if not get_option('libnfs').auto() or have_block + libnfs = dependency('libnfs', version: '>=1.9.3', +-- +2.35.3 + diff --git a/kvm-migration-Add-migrate_use_tls-helper.patch b/kvm-migration-Add-migrate_use_tls-helper.patch new file mode 100644 index 0000000..0fe0d91 --- /dev/null +++ b/kvm-migration-Add-migrate_use_tls-helper.patch @@ -0,0 +1,106 @@ +From 828f6c106eedcb7a48e551ffda15af56ff92a899 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Fri, 13 May 2022 03:28:34 -0300 +Subject: [PATCH 11/18] migration: Add migrate_use_tls() helper +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd +RH-Commit: [5/11] 06e945297c3b9c0ce5864885aafcdba1e5746bc2 (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 1968509 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +A lot of places check parameters.tls_creds in order to evaluate if TLS is +in use, and sometimes call migrate_get_current() just for that test. + +Add new helper function migrate_use_tls() in order to simplify testing +for TLS usage. + +Signed-off-by: Leonardo Bras +Reviewed-by: Juan Quintela +Reviewed-by: Peter Xu +Reviewed-by: Daniel P. Berrangé +Message-Id: <20220513062836.965425-6-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit d2fafb6a6814a8998607d0baf691265032996a0f) +Signed-off-by: Leonardo Bras +--- + migration/channel.c | 3 +-- + migration/migration.c | 9 +++++++++ + migration/migration.h | 1 + + migration/multifd.c | 5 +---- + 4 files changed, 12 insertions(+), 6 deletions(-) + +diff --git a/migration/channel.c b/migration/channel.c +index c4fc000a1a..086b5c0d8b 100644 +--- a/migration/channel.c ++++ b/migration/channel.c +@@ -38,8 +38,7 @@ void migration_channel_process_incoming(QIOChannel *ioc) + trace_migration_set_incoming_channel( + ioc, object_get_typename(OBJECT(ioc))); + +- if (s->parameters.tls_creds && +- *s->parameters.tls_creds && ++ if (migrate_use_tls() && + !object_dynamic_cast(OBJECT(ioc), + TYPE_QIO_CHANNEL_TLS)) { + migration_tls_channel_process_incoming(s, ioc, &local_err); +diff --git a/migration/migration.c b/migration/migration.c +index 0a6b3b9f4d..d91efb66fe 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2582,6 +2582,15 @@ bool migrate_use_zero_copy_send(void) + } + #endif + ++int migrate_use_tls(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.tls_creds && *s->parameters.tls_creds; ++} ++ + int migrate_use_xbzrle(void) + { + MigrationState *s; +diff --git a/migration/migration.h b/migration/migration.h +index 5bcb7628ef..c2cabb8a14 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -381,6 +381,7 @@ bool migrate_use_zero_copy_send(void); + #else + #define migrate_use_zero_copy_send() (false) + #endif ++int migrate_use_tls(void); + int migrate_use_xbzrle(void); + uint64_t migrate_xbzrle_cache_size(void); + bool migrate_colo_enabled(void); +diff --git a/migration/multifd.c b/migration/multifd.c +index 76b57a7177..43998ad117 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -784,14 +784,11 @@ static bool multifd_channel_connect(MultiFDSendParams *p, + QIOChannel *ioc, + Error *error) + { +- MigrationState *s = migrate_get_current(); +- + trace_multifd_set_outgoing_channel( + ioc, object_get_typename(OBJECT(ioc)), p->tls_hostname, error); + + if (!error) { +- if (s->parameters.tls_creds && +- *s->parameters.tls_creds && ++ if (migrate_use_tls() && + !object_dynamic_cast(OBJECT(ioc), + TYPE_QIO_CHANNEL_TLS)) { + multifd_tls_channel_connect(p, ioc, &error); +-- +2.35.3 + diff --git a/kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch b/kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch new file mode 100644 index 0000000..206ac3d --- /dev/null +++ b/kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch @@ -0,0 +1,250 @@ +From d6500340dc3c1152b5efe04ef3daa50c17a55e30 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Fri, 13 May 2022 03:28:33 -0300 +Subject: [PATCH 10/18] migration: Add zero-copy-send parameter for QMP/HMP for + Linux +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd +RH-Commit: [4/11] 514d98d595992c53ff98de750035e080ded8972e (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 1968509 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Add property that allows zero-copy migration of memory pages +on the sending side, and also includes a helper function +migrate_use_zero_copy_send() to check if it's enabled. + +No code is introduced to actually do the migration, but it allow +future implementations to enable/disable this feature. + +On non-Linux builds this parameter is compiled-out. + +Signed-off-by: Leonardo Bras +Reviewed-by: Peter Xu +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Juan Quintela +Acked-by: Markus Armbruster +Message-Id: <20220513062836.965425-5-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit abb6295b3ace5d17c3a65936913fc346616dbf14) +Signed-off-by: Leonardo Bras +--- + migration/migration.c | 32 ++++++++++++++++++++++++++++++++ + migration/migration.h | 5 +++++ + migration/socket.c | 11 +++++++++-- + monitor/hmp-cmds.c | 6 ++++++ + qapi/migration.json | 24 ++++++++++++++++++++++++ + 5 files changed, 76 insertions(+), 2 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 695f0f2900..0a6b3b9f4d 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -899,6 +899,10 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) + params->multifd_zlib_level = s->parameters.multifd_zlib_level; + params->has_multifd_zstd_level = true; + params->multifd_zstd_level = s->parameters.multifd_zstd_level; ++#ifdef CONFIG_LINUX ++ params->has_zero_copy_send = true; ++ params->zero_copy_send = s->parameters.zero_copy_send; ++#endif + params->has_xbzrle_cache_size = true; + params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; + params->has_max_postcopy_bandwidth = true; +@@ -1555,6 +1559,11 @@ static void migrate_params_test_apply(MigrateSetParameters *params, + if (params->has_multifd_compression) { + dest->multifd_compression = params->multifd_compression; + } ++#ifdef CONFIG_LINUX ++ if (params->has_zero_copy_send) { ++ dest->zero_copy_send = params->zero_copy_send; ++ } ++#endif + if (params->has_xbzrle_cache_size) { + dest->xbzrle_cache_size = params->xbzrle_cache_size; + } +@@ -1667,6 +1676,11 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) + if (params->has_multifd_compression) { + s->parameters.multifd_compression = params->multifd_compression; + } ++#ifdef CONFIG_LINUX ++ if (params->has_zero_copy_send) { ++ s->parameters.zero_copy_send = params->zero_copy_send; ++ } ++#endif + if (params->has_xbzrle_cache_size) { + s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; + xbzrle_cache_resize(params->xbzrle_cache_size, errp); +@@ -2557,6 +2571,17 @@ int migrate_multifd_zstd_level(void) + return s->parameters.multifd_zstd_level; + } + ++#ifdef CONFIG_LINUX ++bool migrate_use_zero_copy_send(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.zero_copy_send; ++} ++#endif ++ + int migrate_use_xbzrle(void) + { + MigrationState *s; +@@ -4200,6 +4225,10 @@ static Property migration_properties[] = { + DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, + parameters.multifd_zstd_level, + DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), ++#ifdef CONFIG_LINUX ++ DEFINE_PROP_BOOL("zero_copy_send", MigrationState, ++ parameters.zero_copy_send, false), ++#endif + DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, + parameters.xbzrle_cache_size, + DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), +@@ -4297,6 +4326,9 @@ static void migration_instance_init(Object *obj) + params->has_multifd_compression = true; + params->has_multifd_zlib_level = true; + params->has_multifd_zstd_level = true; ++#ifdef CONFIG_LINUX ++ params->has_zero_copy_send = true; ++#endif + params->has_xbzrle_cache_size = true; + params->has_max_postcopy_bandwidth = true; + params->has_max_cpu_throttle = true; +diff --git a/migration/migration.h b/migration/migration.h +index 2de861df01..5bcb7628ef 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -376,6 +376,11 @@ MultiFDCompression migrate_multifd_compression(void); + int migrate_multifd_zlib_level(void); + int migrate_multifd_zstd_level(void); + ++#ifdef CONFIG_LINUX ++bool migrate_use_zero_copy_send(void); ++#else ++#define migrate_use_zero_copy_send() (false) ++#endif + int migrate_use_xbzrle(void); + uint64_t migrate_xbzrle_cache_size(void); + bool migrate_colo_enabled(void); +diff --git a/migration/socket.c b/migration/socket.c +index 05705a32d8..3754d8f72c 100644 +--- a/migration/socket.c ++++ b/migration/socket.c +@@ -74,9 +74,16 @@ static void socket_outgoing_migration(QIOTask *task, + + if (qio_task_propagate_error(task, &err)) { + trace_migration_socket_outgoing_error(error_get_pretty(err)); +- } else { +- trace_migration_socket_outgoing_connected(data->hostname); ++ goto out; + } ++ ++ trace_migration_socket_outgoing_connected(data->hostname); ++ ++ if (migrate_use_zero_copy_send()) { ++ error_setg(&err, "Zero copy send not available in migration"); ++ } ++ ++out: + migration_channel_connect(data->s, sioc, data->hostname, err); + object_unref(OBJECT(sioc)); + } +diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c +index 634968498b..55b48d3733 100644 +--- a/monitor/hmp-cmds.c ++++ b/monitor/hmp-cmds.c +@@ -1309,6 +1309,12 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) + p->has_multifd_zstd_level = true; + visit_type_uint8(v, param, &p->multifd_zstd_level, &err); + break; ++#ifdef CONFIG_LINUX ++ case MIGRATION_PARAMETER_ZERO_COPY_SEND: ++ p->has_zero_copy_send = true; ++ visit_type_bool(v, param, &p->zero_copy_send, &err); ++ break; ++#endif + case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE: + p->has_xbzrle_cache_size = true; + if (!visit_type_size(v, param, &cache_size, &err)) { +diff --git a/qapi/migration.json b/qapi/migration.json +index 27d7b28158..4d833ecdd6 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -741,6 +741,13 @@ + # will consume more CPU. + # Defaults to 1. (Since 5.0) + # ++# @zero-copy-send: Controls behavior on sending memory pages on migration. ++# When true, enables a zero-copy mechanism for sending ++# memory pages, if host supports it. ++# Requires that QEMU be permitted to use locked memory ++# for guest RAM pages. ++# Defaults to false. (Since 7.1) ++# + # @block-bitmap-mapping: Maps block nodes and bitmaps on them to + # aliases for the purpose of dirty bitmap migration. Such + # aliases may for example be the corresponding names on the +@@ -780,6 +787,7 @@ + 'xbzrle-cache-size', 'max-postcopy-bandwidth', + 'max-cpu-throttle', 'multifd-compression', + 'multifd-zlib-level' ,'multifd-zstd-level', ++ { 'name': 'zero-copy-send', 'if' : 'CONFIG_LINUX'}, + 'block-bitmap-mapping' ] } + + ## +@@ -906,6 +914,13 @@ + # will consume more CPU. + # Defaults to 1. (Since 5.0) + # ++# @zero-copy-send: Controls behavior on sending memory pages on migration. ++# When true, enables a zero-copy mechanism for sending ++# memory pages, if host supports it. ++# Requires that QEMU be permitted to use locked memory ++# for guest RAM pages. ++# Defaults to false. (Since 7.1) ++# + # @block-bitmap-mapping: Maps block nodes and bitmaps on them to + # aliases for the purpose of dirty bitmap migration. Such + # aliases may for example be the corresponding names on the +@@ -960,6 +975,7 @@ + '*multifd-compression': 'MultiFDCompression', + '*multifd-zlib-level': 'uint8', + '*multifd-zstd-level': 'uint8', ++ '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' }, + '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } + + ## +@@ -1106,6 +1122,13 @@ + # will consume more CPU. + # Defaults to 1. (Since 5.0) + # ++# @zero-copy-send: Controls behavior on sending memory pages on migration. ++# When true, enables a zero-copy mechanism for sending ++# memory pages, if host supports it. ++# Requires that QEMU be permitted to use locked memory ++# for guest RAM pages. ++# Defaults to false. (Since 7.1) ++# + # @block-bitmap-mapping: Maps block nodes and bitmaps on them to + # aliases for the purpose of dirty bitmap migration. Such + # aliases may for example be the corresponding names on the +@@ -1158,6 +1181,7 @@ + '*multifd-compression': 'MultiFDCompression', + '*multifd-zlib-level': 'uint8', + '*multifd-zstd-level': 'uint8', ++ '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' }, + '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } + + ## +-- +2.35.3 + diff --git a/kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch b/kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch new file mode 100644 index 0000000..29dc0ea --- /dev/null +++ b/kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch @@ -0,0 +1,98 @@ +From fd6f516a94e635bc42e58448f314db575814a834 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Thu, 31 Mar 2022 11:08:45 -0400 +Subject: [PATCH 18/18] migration: Allow migrate-recover to run multiple times +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 104: migration: Allow migrate-recover to run multiple times +RH-Commit: [1/1] afd726e54c069ae800e2d01f34e768d6bac7dcb9 (peterx/qemu-kvm) +RH-Bugzilla: 2096143 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Hanna Reitz +RH-Acked-by: Dr. David Alan Gilbert + +Previously migration didn't have an easy way to cleanup the listening +transport, migrate recovery only allows to execute once. That's done with a +trick flag in postcopy_recover_triggered. + +Now the facility is already there. + +Drop postcopy_recover_triggered and instead allows a new migrate-recover to +release the previous listener transport. + +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Peter Xu +Message-Id: <20220331150857.74406-8-peterx@redhat.com> +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 08401c0426bc1a5ce4609afd1cda5dd39abbf9fa) +Signed-off-by: Peter Xu +--- + migration/migration.c | 13 ++----------- + migration/migration.h | 1 - + migration/savevm.c | 3 --- + 3 files changed, 2 insertions(+), 15 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 2a141bfaf3..8fb3eae910 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2166,11 +2166,8 @@ void qmp_migrate_recover(const char *uri, Error **errp) + return; + } + +- if (qatomic_cmpxchg(&mis->postcopy_recover_triggered, +- false, true) == true) { +- error_setg(errp, "Migrate recovery is triggered already"); +- return; +- } ++ /* If there's an existing transport, release it */ ++ migration_incoming_transport_cleanup(mis); + + /* + * Note that this call will never start a real migration; it will +@@ -2178,12 +2175,6 @@ void qmp_migrate_recover(const char *uri, Error **errp) + * to continue using that newly established channel. + */ + qemu_start_incoming_migration(uri, errp); +- +- /* Safe to dereference with the assert above */ +- if (*errp) { +- /* Reset the flag so user could still retry */ +- qatomic_set(&mis->postcopy_recover_triggered, false); +- } + } + + void qmp_migrate_pause(Error **errp) +diff --git a/migration/migration.h b/migration/migration.h +index c2cabb8a14..fbc8690ec8 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -139,7 +139,6 @@ struct MigrationIncomingState { + struct PostcopyBlocktimeContext *blocktime_ctx; + + /* notify PAUSED postcopy incoming migrations to try to continue */ +- bool postcopy_recover_triggered; + QemuSemaphore postcopy_pause_sem_dst; + QemuSemaphore postcopy_pause_sem_fault; + +diff --git a/migration/savevm.c b/migration/savevm.c +index 02ed94c180..d9076897b8 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -2589,9 +2589,6 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis) + + assert(migrate_postcopy_ram()); + +- /* Clear the triggered bit to allow one recovery */ +- mis->postcopy_recover_triggered = false; +- + /* + * Unregister yank with either from/to src would work, since ioc behind it + * is the same +-- +2.35.3 + diff --git a/kvm-migration-Change-zero_copy_send-from-migration-param.patch b/kvm-migration-Change-zero_copy_send-from-migration-param.patch new file mode 100644 index 0000000..abeeeb6 --- /dev/null +++ b/kvm-migration-Change-zero_copy_send-from-migration-param.patch @@ -0,0 +1,289 @@ +From 7e2a037f3f349c21201152cecce32d8c8ff0bea0 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Mon, 20 Jun 2022 02:39:45 -0300 +Subject: [PATCH 17/18] migration: Change zero_copy_send from migration + parameter to migration capability +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd +RH-Commit: [11/11] e4a955607947896a49398ac8400241a0adac51a1 (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 1968509 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +When originally implemented, zero_copy_send was designed as a Migration +paramenter. + +But taking into account how is that supposed to work, and how +the difference between a capability and a parameter, it only makes sense +that zero-copy-send would work better as a capability. + +Taking into account how recently the change got merged, it was decided +that it's still time to make it right, and convert zero_copy_send into +a Migration capability. + +Signed-off-by: Leonardo Bras +Reviewed-by: Juan Quintela +Acked-by: Markus Armbruster +Acked-by: Peter Xu +Signed-off-by: Juan Quintela +Signed-off-by: Dr. David Alan Gilbert + dgilbert: always define the capability, even on non-Linux but error if +set; avoids build problems with the capability +(cherry picked from commit 1abaec9a1b2c23f7aa94709a422128d9e42c3e0b) +Signed-off-by: Leonardo Bras +--- + migration/migration.c | 58 +++++++++++++++++++------------------------ + monitor/hmp-cmds.c | 6 ----- + qapi/migration.json | 33 +++++++----------------- + 3 files changed, 34 insertions(+), 63 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 102236fba0..2a141bfaf3 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -163,7 +163,8 @@ INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, + MIGRATION_CAPABILITY_COMPRESS, + MIGRATION_CAPABILITY_XBZRLE, + MIGRATION_CAPABILITY_X_COLO, +- MIGRATION_CAPABILITY_VALIDATE_UUID); ++ MIGRATION_CAPABILITY_VALIDATE_UUID, ++ MIGRATION_CAPABILITY_ZERO_COPY_SEND); + + /* When we add fault tolerance, we could have several + migrations at once. For now we don't need to add +@@ -899,10 +900,6 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) + params->multifd_zlib_level = s->parameters.multifd_zlib_level; + params->has_multifd_zstd_level = true; + params->multifd_zstd_level = s->parameters.multifd_zstd_level; +-#ifdef CONFIG_LINUX +- params->has_zero_copy_send = true; +- params->zero_copy_send = s->parameters.zero_copy_send; +-#endif + params->has_xbzrle_cache_size = true; + params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; + params->has_max_postcopy_bandwidth = true; +@@ -1263,6 +1260,24 @@ static bool migrate_caps_check(bool *cap_list, + } + } + ++#ifdef CONFIG_LINUX ++ if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && ++ (!cap_list[MIGRATION_CAPABILITY_MULTIFD] || ++ migrate_use_compression() || ++ migrate_use_tls())) { ++ error_setg(errp, ++ "Zero copy only available for non-compressed non-TLS multifd migration"); ++ return false; ++ } ++#else ++ if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) { ++ error_setg(errp, ++ "Zero copy currently only available on Linux"); ++ return false; ++ } ++#endif ++ ++ + /* incoming side only */ + if (runstate_check(RUN_STATE_INMIGRATE) && + !migrate_multifd_is_allowed() && +@@ -1485,16 +1500,6 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) + error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); + return false; + } +-#ifdef CONFIG_LINUX +- if (params->zero_copy_send && +- (!migrate_use_multifd() || +- params->multifd_compression != MULTIFD_COMPRESSION_NONE || +- (params->tls_creds && *params->tls_creds))) { +- error_setg(errp, +- "Zero copy only available for non-compressed non-TLS multifd migration"); +- return false; +- } +-#endif + return true; + } + +@@ -1568,11 +1573,6 @@ static void migrate_params_test_apply(MigrateSetParameters *params, + if (params->has_multifd_compression) { + dest->multifd_compression = params->multifd_compression; + } +-#ifdef CONFIG_LINUX +- if (params->has_zero_copy_send) { +- dest->zero_copy_send = params->zero_copy_send; +- } +-#endif + if (params->has_xbzrle_cache_size) { + dest->xbzrle_cache_size = params->xbzrle_cache_size; + } +@@ -1685,11 +1685,6 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) + if (params->has_multifd_compression) { + s->parameters.multifd_compression = params->multifd_compression; + } +-#ifdef CONFIG_LINUX +- if (params->has_zero_copy_send) { +- s->parameters.zero_copy_send = params->zero_copy_send; +- } +-#endif + if (params->has_xbzrle_cache_size) { + s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; + xbzrle_cache_resize(params->xbzrle_cache_size, errp); +@@ -2587,7 +2582,7 @@ bool migrate_use_zero_copy_send(void) + + s = migrate_get_current(); + +- return s->parameters.zero_copy_send; ++ return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; + } + #endif + +@@ -4243,10 +4238,6 @@ static Property migration_properties[] = { + DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, + parameters.multifd_zstd_level, + DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), +-#ifdef CONFIG_LINUX +- DEFINE_PROP_BOOL("zero_copy_send", MigrationState, +- parameters.zero_copy_send, false), +-#endif + DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, + parameters.xbzrle_cache_size, + DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), +@@ -4284,6 +4275,10 @@ static Property migration_properties[] = { + DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD), + DEFINE_PROP_MIG_CAP("x-background-snapshot", + MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT), ++#ifdef CONFIG_LINUX ++ DEFINE_PROP_MIG_CAP("x-zero-copy-send", ++ MIGRATION_CAPABILITY_ZERO_COPY_SEND), ++#endif + + DEFINE_PROP_END_OF_LIST(), + }; +@@ -4344,9 +4339,6 @@ static void migration_instance_init(Object *obj) + params->has_multifd_compression = true; + params->has_multifd_zlib_level = true; + params->has_multifd_zstd_level = true; +-#ifdef CONFIG_LINUX +- params->has_zero_copy_send = true; +-#endif + params->has_xbzrle_cache_size = true; + params->has_max_postcopy_bandwidth = true; + params->has_max_cpu_throttle = true; +diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c +index 55b48d3733..634968498b 100644 +--- a/monitor/hmp-cmds.c ++++ b/monitor/hmp-cmds.c +@@ -1309,12 +1309,6 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) + p->has_multifd_zstd_level = true; + visit_type_uint8(v, param, &p->multifd_zstd_level, &err); + break; +-#ifdef CONFIG_LINUX +- case MIGRATION_PARAMETER_ZERO_COPY_SEND: +- p->has_zero_copy_send = true; +- visit_type_bool(v, param, &p->zero_copy_send, &err); +- break; +-#endif + case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE: + p->has_xbzrle_cache_size = true; + if (!visit_type_size(v, param, &cache_size, &err)) { +diff --git a/qapi/migration.json b/qapi/migration.json +index 4d833ecdd6..5105790cd0 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -463,6 +463,13 @@ + # procedure starts. The VM RAM is saved with running VM. + # (since 6.0) + # ++# @zero-copy-send: Controls behavior on sending memory pages on migration. ++# When true, enables a zero-copy mechanism for sending ++# memory pages, if host supports it. ++# Requires that QEMU be permitted to use locked memory ++# for guest RAM pages. ++# (since 7.1) ++# + # Features: + # @unstable: Members @x-colo and @x-ignore-shared are experimental. + # +@@ -476,7 +483,8 @@ + 'block', 'return-path', 'pause-before-switchover', 'multifd', + 'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate', + { 'name': 'x-ignore-shared', 'features': [ 'unstable' ] }, +- 'validate-uuid', 'background-snapshot'] } ++ 'validate-uuid', 'background-snapshot', ++ 'zero-copy-send'] } + + ## + # @MigrationCapabilityStatus: +@@ -741,12 +749,6 @@ + # will consume more CPU. + # Defaults to 1. (Since 5.0) + # +-# @zero-copy-send: Controls behavior on sending memory pages on migration. +-# When true, enables a zero-copy mechanism for sending +-# memory pages, if host supports it. +-# Requires that QEMU be permitted to use locked memory +-# for guest RAM pages. +-# Defaults to false. (Since 7.1) + # + # @block-bitmap-mapping: Maps block nodes and bitmaps on them to + # aliases for the purpose of dirty bitmap migration. Such +@@ -787,7 +789,6 @@ + 'xbzrle-cache-size', 'max-postcopy-bandwidth', + 'max-cpu-throttle', 'multifd-compression', + 'multifd-zlib-level' ,'multifd-zstd-level', +- { 'name': 'zero-copy-send', 'if' : 'CONFIG_LINUX'}, + 'block-bitmap-mapping' ] } + + ## +@@ -914,13 +915,6 @@ + # will consume more CPU. + # Defaults to 1. (Since 5.0) + # +-# @zero-copy-send: Controls behavior on sending memory pages on migration. +-# When true, enables a zero-copy mechanism for sending +-# memory pages, if host supports it. +-# Requires that QEMU be permitted to use locked memory +-# for guest RAM pages. +-# Defaults to false. (Since 7.1) +-# + # @block-bitmap-mapping: Maps block nodes and bitmaps on them to + # aliases for the purpose of dirty bitmap migration. Such + # aliases may for example be the corresponding names on the +@@ -975,7 +969,6 @@ + '*multifd-compression': 'MultiFDCompression', + '*multifd-zlib-level': 'uint8', + '*multifd-zstd-level': 'uint8', +- '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' }, + '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } + + ## +@@ -1122,13 +1115,6 @@ + # will consume more CPU. + # Defaults to 1. (Since 5.0) + # +-# @zero-copy-send: Controls behavior on sending memory pages on migration. +-# When true, enables a zero-copy mechanism for sending +-# memory pages, if host supports it. +-# Requires that QEMU be permitted to use locked memory +-# for guest RAM pages. +-# Defaults to false. (Since 7.1) +-# + # @block-bitmap-mapping: Maps block nodes and bitmaps on them to + # aliases for the purpose of dirty bitmap migration. Such + # aliases may for example be the corresponding names on the +@@ -1181,7 +1167,6 @@ + '*multifd-compression': 'MultiFDCompression', + '*multifd-zlib-level': 'uint8', + '*multifd-zstd-level': 'uint8', +- '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' }, + '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } + + ## +-- +2.35.3 + diff --git a/kvm-migration-Fix-operator-type.patch b/kvm-migration-Fix-operator-type.patch new file mode 100644 index 0000000..f6a462a --- /dev/null +++ b/kvm-migration-Fix-operator-type.patch @@ -0,0 +1,47 @@ +From 4bd48e784ae0c38c89f1a944b06c997fd28c4d37 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Thu, 19 May 2022 04:15:33 -0400 +Subject: [PATCH 16/16] migration: Fix operator type +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 92: Fix build using clang 14 +RH-Commit: [1/1] ad9980e64cf2e39085d68f1ff601444bf2afe228 (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 2064530 +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +Clang spotted an & that should have been an &&; fix it. + +Reported by: David Binderman / https://gitlab.com/dcb +Fixes: 65dacaa04fa ("migration: introduce save_normal_page()") +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/963 +Signed-off-by: Dr. David Alan Gilbert +Message-Id: <20220406102515.96320-1-dgilbert@redhat.com> +Reviewed-by: Peter Maydell +Reviewed-by: Peter Xu +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit f912ec5b2d65644116ff496b58d7c9145c19e4c0) +Signed-off-by: Miroslav Rezanina +--- + migration/ram.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/migration/ram.c b/migration/ram.c +index 3532f64ecb..0ef4bd63eb 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1289,7 +1289,7 @@ static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, + offset | RAM_SAVE_FLAG_PAGE)); + if (async) { + qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE, +- migrate_release_ram() & ++ migrate_release_ram() && + migration_in_postcopy()); + } else { + qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE); +-- +2.31.1 + diff --git a/kvm-multifd-Copy-pages-before-compressing-them-with-zlib.patch b/kvm-multifd-Copy-pages-before-compressing-them-with-zlib.patch new file mode 100644 index 0000000..ea89a9f --- /dev/null +++ b/kvm-multifd-Copy-pages-before-compressing-them-with-zlib.patch @@ -0,0 +1,142 @@ +From 1d280070748b604c60a7be4d4c3c3a28e3964f37 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Tue, 2 Aug 2022 10:11:21 +0200 +Subject: [PATCH 31/32] multifd: Copy pages before compressing them with zlib + +RH-Author: Thomas Huth +RH-MergeRequest: 112: Fix postcopy migration on s390x +RH-Commit: [1/2] fd5a0221e22b4563bd1cb7f8a8b95f0bfe8f5fc9 (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2099934 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Peter Xu + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2099934 + +zlib_send_prepare() compresses pages of a running VM. zlib does not +make any thread-safety guarantees with respect to changing deflate() +input concurrently with deflate() [1]. + +One can observe problems due to this with the IBM zEnterprise Data +Compression accelerator capable zlib [2]. When the hardware +acceleration is enabled, migration/multifd/tcp/plain/zlib test fails +intermittently [3] due to sliding window corruption. The accelerator's +architecture explicitly discourages concurrent accesses [4]: + + Page 26-57, "Other Conditions": + + As observed by this CPU, other CPUs, and channel + programs, references to the parameter block, first, + second, and third operands may be multiple-access + references, accesses to these storage locations are + not necessarily block-concurrent, and the sequence + of these accesses or references is undefined. + +Mark Adler pointed out that vanilla zlib performs double fetches under +certain circumstances as well [5], therefore we need to copy data +before passing it to deflate(). + +[1] https://zlib.net/manual.html +[2] https://github.com/madler/zlib/pull/410 +[3] https://lists.nongnu.org/archive/html/qemu-devel/2022-03/msg03988.html +[4] http://publibfp.dhe.ibm.com/epubs/pdf/a227832c.pdf +[5] https://lists.gnu.org/archive/html/qemu-devel/2022-07/msg00889.html + +Signed-off-by: Ilya Leoshkevich +Message-Id: <20220705203559.2960949-1-iii@linux.ibm.com> +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 007e179ef0e97eafda4c9ff2a9d665a1947c7c6d) +Signed-off-by: Thomas Huth +--- + migration/multifd-zlib.c | 38 ++++++++++++++++++++++++++++++-------- + 1 file changed, 30 insertions(+), 8 deletions(-) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index 3a7ae44485..18213a9513 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -27,6 +27,8 @@ struct zlib_data { + uint8_t *zbuff; + /* size of compressed buffer */ + uint32_t zbuff_len; ++ /* uncompressed buffer of size qemu_target_page_size() */ ++ uint8_t *buf; + }; + + /* Multifd zlib compression */ +@@ -45,26 +47,38 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp) + { + struct zlib_data *z = g_new0(struct zlib_data, 1); + z_stream *zs = &z->zs; ++ const char *err_msg; + + zs->zalloc = Z_NULL; + zs->zfree = Z_NULL; + zs->opaque = Z_NULL; + if (deflateInit(zs, migrate_multifd_zlib_level()) != Z_OK) { +- g_free(z); +- error_setg(errp, "multifd %u: deflate init failed", p->id); +- return -1; ++ err_msg = "deflate init failed"; ++ goto err_free_z; + } + /* This is the maxium size of the compressed buffer */ + z->zbuff_len = compressBound(MULTIFD_PACKET_SIZE); + z->zbuff = g_try_malloc(z->zbuff_len); + if (!z->zbuff) { +- deflateEnd(&z->zs); +- g_free(z); +- error_setg(errp, "multifd %u: out of memory for zbuff", p->id); +- return -1; ++ err_msg = "out of memory for zbuff"; ++ goto err_deflate_end; ++ } ++ z->buf = g_try_malloc(qemu_target_page_size()); ++ if (!z->buf) { ++ err_msg = "out of memory for buf"; ++ goto err_free_zbuff; + } + p->data = z; + return 0; ++ ++err_free_zbuff: ++ g_free(z->zbuff); ++err_deflate_end: ++ deflateEnd(&z->zs); ++err_free_z: ++ g_free(z); ++ error_setg(errp, "multifd %u: %s", p->id, err_msg); ++ return -1; + } + + /** +@@ -82,6 +96,8 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp) + deflateEnd(&z->zs); + g_free(z->zbuff); + z->zbuff = NULL; ++ g_free(z->buf); ++ z->buf = NULL; + g_free(p->data); + p->data = NULL; + } +@@ -114,8 +130,14 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + flush = Z_SYNC_FLUSH; + } + ++ /* ++ * Since the VM might be running, the page may be changing concurrently ++ * with compression. zlib does not guarantee that this is safe, ++ * therefore copy the page before calling deflate(). ++ */ ++ memcpy(z->buf, p->pages->block->host + p->normal[i], page_size); + zs->avail_in = page_size; +- zs->next_in = p->pages->block->host + p->normal[i]; ++ zs->next_in = z->buf; + + zs->avail_out = available; + zs->next_out = z->zbuff + out_size; +-- +2.31.1 + diff --git a/kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch b/kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch new file mode 100644 index 0000000..c7159e1 --- /dev/null +++ b/kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch @@ -0,0 +1,182 @@ +From c1a2866d158ac67179fa0d17f1710302eb9a3866 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Fri, 13 May 2022 03:28:37 -0300 +Subject: [PATCH 14/18] multifd: Implement zero copy write in multifd migration + (multifd-zero-copy) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd +RH-Commit: [8/11] b93009cc94b2cc4b464b4f68ebfb37b870dd6f7d (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 1968509 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Implement zero copy send on nocomp_send_write(), by making use of QIOChannel +writev + flags & flush interface. + +Change multifd_send_sync_main() so flush_zero_copy() can be called +after each iteration in order to make sure all dirty pages are sent before +a new iteration is started. It will also flush at the beginning and at the +end of migration. + +Also make it return -1 if flush_zero_copy() fails, in order to cancel +the migration process, and avoid resuming the guest in the target host +without receiving all current RAM. + +This will work fine on RAM migration because the RAM pages are not usually freed, +and there is no problem on changing the pages content between writev_zero_copy() and +the actual sending of the buffer, because this change will dirty the page and +cause it to be re-sent on a next iteration anyway. + +A lot of locked memory may be needed in order to use multifd migration +with zero-copy enabled, so disabling the feature should be necessary for +low-privileged users trying to perform multifd migrations. + +Signed-off-by: Leonardo Bras +Reviewed-by: Peter Xu +Reviewed-by: Daniel P. Berrangé +Message-Id: <20220513062836.965425-9-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 5b1d9bab2da4fca3a3caee97c430e5709cb32b7b) +Signed-off-by: Leonardo Bras +--- + migration/migration.c | 11 ++++++++++- + migration/multifd.c | 37 +++++++++++++++++++++++++++++++++++-- + migration/multifd.h | 2 ++ + migration/socket.c | 5 +++-- + 4 files changed, 50 insertions(+), 5 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index d91efb66fe..102236fba0 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1485,7 +1485,16 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) + error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); + return false; + } +- ++#ifdef CONFIG_LINUX ++ if (params->zero_copy_send && ++ (!migrate_use_multifd() || ++ params->multifd_compression != MULTIFD_COMPRESSION_NONE || ++ (params->tls_creds && *params->tls_creds))) { ++ error_setg(errp, ++ "Zero copy only available for non-compressed non-TLS multifd migration"); ++ return false; ++ } ++#endif + return true; + } + +diff --git a/migration/multifd.c b/migration/multifd.c +index 8fca6c970e..0b5b41c53f 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -571,6 +571,7 @@ void multifd_save_cleanup(void) + int multifd_send_sync_main(QEMUFile *f) + { + int i; ++ bool flush_zero_copy; + + if (!migrate_use_multifd()) { + return 0; +@@ -581,6 +582,20 @@ int multifd_send_sync_main(QEMUFile *f) + return -1; + } + } ++ ++ /* ++ * When using zero-copy, it's necessary to flush the pages before any of ++ * the pages can be sent again, so we'll make sure the new version of the ++ * pages will always arrive _later_ than the old pages. ++ * ++ * Currently we achieve this by flushing the zero-page requested writes ++ * per ram iteration, but in the future we could potentially optimize it ++ * to be less frequent, e.g. only after we finished one whole scanning of ++ * all the dirty bitmaps. ++ */ ++ ++ flush_zero_copy = migrate_use_zero_copy_send(); ++ + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + +@@ -602,6 +617,17 @@ int multifd_send_sync_main(QEMUFile *f) + ram_counters.transferred += p->packet_len; + qemu_mutex_unlock(&p->mutex); + qemu_sem_post(&p->sem); ++ ++ if (flush_zero_copy && p->c) { ++ int ret; ++ Error *err = NULL; ++ ++ ret = qio_channel_flush(p->c, &err); ++ if (ret < 0) { ++ error_report_err(err); ++ return -1; ++ } ++ } + } + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; +@@ -686,8 +712,8 @@ static void *multifd_send_thread(void *opaque) + p->iov[0].iov_base = p->packet; + } + +- ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num, +- &local_err); ++ ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num, NULL, ++ 0, p->write_flags, &local_err); + if (ret != 0) { + break; + } +@@ -928,6 +954,13 @@ int multifd_save_setup(Error **errp) + /* We need one extra place for the packet header */ + p->iov = g_new0(struct iovec, page_count + 1); + p->normal = g_new0(ram_addr_t, page_count); ++ ++ if (migrate_use_zero_copy_send()) { ++ p->write_flags = QIO_CHANNEL_WRITE_FLAG_ZERO_COPY; ++ } else { ++ p->write_flags = 0; ++ } ++ + socket_send_channel_create(multifd_new_send_channel_async, p); + } + +diff --git a/migration/multifd.h b/migration/multifd.h +index cd495195ce..7ec688fb4f 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -96,6 +96,8 @@ typedef struct { + uint32_t packet_len; + /* pointer to the packet */ + MultiFDPacket_t *packet; ++ /* multifd flags for sending ram */ ++ int write_flags; + /* multifd flags for each packet */ + uint32_t flags; + /* size of the next packet that contains pages */ +diff --git a/migration/socket.c b/migration/socket.c +index 3754d8f72c..4fd5e85f50 100644 +--- a/migration/socket.c ++++ b/migration/socket.c +@@ -79,8 +79,9 @@ static void socket_outgoing_migration(QIOTask *task, + + trace_migration_socket_outgoing_connected(data->hostname); + +- if (migrate_use_zero_copy_send()) { +- error_setg(&err, "Zero copy send not available in migration"); ++ if (migrate_use_zero_copy_send() && ++ !qio_channel_has_feature(sioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) { ++ error_setg(&err, "Zero copy send feature not detected in host kernel"); + } + + out: +-- +2.35.3 + diff --git a/kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch b/kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch new file mode 100644 index 0000000..415e3a9 --- /dev/null +++ b/kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch @@ -0,0 +1,102 @@ +From 63255c13492f42a3236d96e706e5f8e70bb4e219 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Fri, 13 May 2022 03:28:36 -0300 +Subject: [PATCH 13/18] multifd: Send header packet without flags if + zero-copy-send is enabled +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd +RH-Commit: [7/11] 137eea685e387d3d6aff187ec3fcac05bc16b6e3 (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 1968509 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Since d48c3a0445 ("multifd: Use a single writev on the send side"), +sending the header packet and the memory pages happens in the same +writev, which can potentially make the migration faster. + +Using channel-socket as example, this works well with the default copying +mechanism of sendmsg(), but with zero-copy-send=true, it will cause +the migration to often break. + +This happens because the header packet buffer gets reused quite often, +and there is a high chance that by the time the MSG_ZEROCOPY mechanism get +to send the buffer, it has already changed, sending the wrong data and +causing the migration to abort. + +It means that, as it is, the buffer for the header packet is not suitable +for sending with MSG_ZEROCOPY. + +In order to enable zero copy for multifd, send the header packet on an +individual write(), without any flags, and the remanining pages with a +writev(), as it was happening before. This only changes how a migration +with zero-copy-send=true works, not changing any current behavior for +migrations with zero-copy-send=false. + +Signed-off-by: Leonardo Bras +Reviewed-by: Peter Xu +Reviewed-by: Daniel P. Berrangé +Message-Id: <20220513062836.965425-8-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit b7dbdd8e76cd03453c234dbb9578d20969859d74) +Signed-off-by: Leonardo Bras +--- + migration/multifd.c | 22 +++++++++++++++++++--- + 1 file changed, 19 insertions(+), 3 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index cdb57439a7..8fca6c970e 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -619,6 +619,7 @@ static void *multifd_send_thread(void *opaque) + MultiFDSendParams *p = opaque; + Error *local_err = NULL; + int ret = 0; ++ bool use_zero_copy_send = migrate_use_zero_copy_send(); + + trace_multifd_send_thread_start(p->id); + rcu_register_thread(); +@@ -641,9 +642,14 @@ static void *multifd_send_thread(void *opaque) + if (p->pending_job) { + uint64_t packet_num = p->packet_num; + uint32_t flags = p->flags; +- p->iovs_num = 1; + p->normal_num = 0; + ++ if (use_zero_copy_send) { ++ p->iovs_num = 0; ++ } else { ++ p->iovs_num = 1; ++ } ++ + for (int i = 0; i < p->pages->num; i++) { + p->normal[p->normal_num] = p->pages->offset[i]; + p->normal_num++; +@@ -667,8 +673,18 @@ static void *multifd_send_thread(void *opaque) + trace_multifd_send(p->id, packet_num, p->normal_num, flags, + p->next_packet_size); + +- p->iov[0].iov_len = p->packet_len; +- p->iov[0].iov_base = p->packet; ++ if (use_zero_copy_send) { ++ /* Send header first, without zerocopy */ ++ ret = qio_channel_write_all(p->c, (void *)p->packet, ++ p->packet_len, &local_err); ++ if (ret != 0) { ++ break; ++ } ++ } else { ++ /* Send header using the same writev call */ ++ p->iov[0].iov_len = p->packet_len; ++ p->iov[0].iov_base = p->packet; ++ } + + ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num, + &local_err); +-- +2.35.3 + diff --git a/kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch b/kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch new file mode 100644 index 0000000..e6d726a --- /dev/null +++ b/kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch @@ -0,0 +1,163 @@ +From 4ca5375a936bc87829c6e2b4620f56c73a5efc70 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Fri, 13 May 2022 03:28:35 -0300 +Subject: [PATCH 12/18] multifd: multifd_send_sync_main now returns negative on + error +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd +RH-Commit: [6/11] c8ebdee4327d463c74f4b2eeb42d3c964f314c94 (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 1968509 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Even though multifd_send_sync_main() currently emits error_reports, it's +callers don't really check it before continuing. + +Change multifd_send_sync_main() to return -1 on error and 0 on success. +Also change all it's callers to make use of this change and possibly fail +earlier. + +(This change is important to next patch on multifd zero copy +implementation, to make it sure an error in zero-copy flush does not go +unnoticed. + +Signed-off-by: Leonardo Bras +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Peter Xu +Message-Id: <20220513062836.965425-7-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 33d70973a3a6e8c6b62bcbc64d9e488961981007) +Signed-off-by: Leonardo Bras +--- + migration/multifd.c | 10 ++++++---- + migration/multifd.h | 2 +- + migration/ram.c | 29 ++++++++++++++++++++++------- + 3 files changed, 29 insertions(+), 12 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 43998ad117..cdb57439a7 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -568,17 +568,17 @@ void multifd_save_cleanup(void) + multifd_send_state = NULL; + } + +-void multifd_send_sync_main(QEMUFile *f) ++int multifd_send_sync_main(QEMUFile *f) + { + int i; + + if (!migrate_use_multifd()) { +- return; ++ return 0; + } + if (multifd_send_state->pages->num) { + if (multifd_send_pages(f) < 0) { + error_report("%s: multifd_send_pages fail", __func__); +- return; ++ return -1; + } + } + for (i = 0; i < migrate_multifd_channels(); i++) { +@@ -591,7 +591,7 @@ void multifd_send_sync_main(QEMUFile *f) + if (p->quit) { + error_report("%s: channel %d has already quit", __func__, i); + qemu_mutex_unlock(&p->mutex); +- return; ++ return -1; + } + + p->packet_num = multifd_send_state->packet_num++; +@@ -610,6 +610,8 @@ void multifd_send_sync_main(QEMUFile *f) + qemu_sem_wait(&p->sem_sync); + } + trace_multifd_send_sync_main(multifd_send_state->packet_num); ++ ++ return 0; + } + + static void *multifd_send_thread(void *opaque) +diff --git a/migration/multifd.h b/migration/multifd.h +index 4dda900a0b..cd495195ce 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -22,7 +22,7 @@ int multifd_load_cleanup(Error **errp); + bool multifd_recv_all_channels_created(void); + bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp); + void multifd_recv_sync_main(void); +-void multifd_send_sync_main(QEMUFile *f); ++int multifd_send_sync_main(QEMUFile *f); + int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset); + + /* Multifd Compression flags */ +diff --git a/migration/ram.c b/migration/ram.c +index 0ef4bd63eb..fb6db54642 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -2903,6 +2903,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) + { + RAMState **rsp = opaque; + RAMBlock *block; ++ int ret; + + if (compress_threads_save_setup()) { + return -1; +@@ -2937,7 +2938,11 @@ static int ram_save_setup(QEMUFile *f, void *opaque) + ram_control_before_iterate(f, RAM_CONTROL_SETUP); + ram_control_after_iterate(f, RAM_CONTROL_SETUP); + +- multifd_send_sync_main(f); ++ ret = multifd_send_sync_main(f); ++ if (ret < 0) { ++ return ret; ++ } ++ + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + qemu_fflush(f); + +@@ -3046,7 +3051,11 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) + out: + if (ret >= 0 + && migration_is_setup_or_active(migrate_get_current()->state)) { +- multifd_send_sync_main(rs->f); ++ ret = multifd_send_sync_main(rs->f); ++ if (ret < 0) { ++ return ret; ++ } ++ + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + qemu_fflush(f); + ram_transferred_add(8); +@@ -3106,13 +3115,19 @@ static int ram_save_complete(QEMUFile *f, void *opaque) + ram_control_after_iterate(f, RAM_CONTROL_FINISH); + } + +- if (ret >= 0) { +- multifd_send_sync_main(rs->f); +- qemu_put_be64(f, RAM_SAVE_FLAG_EOS); +- qemu_fflush(f); ++ if (ret < 0) { ++ return ret; + } + +- return ret; ++ ret = multifd_send_sync_main(rs->f); ++ if (ret < 0) { ++ return ret; ++ } ++ ++ qemu_put_be64(f, RAM_SAVE_FLAG_EOS); ++ qemu_fflush(f); ++ ++ return 0; + } + + static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size, +-- +2.35.3 + diff --git a/kvm-nbd-server-Allow-MULTI_CONN-for-shared-writable-expo.patch b/kvm-nbd-server-Allow-MULTI_CONN-for-shared-writable-expo.patch new file mode 100644 index 0000000..56abcb1 --- /dev/null +++ b/kvm-nbd-server-Allow-MULTI_CONN-for-shared-writable-expo.patch @@ -0,0 +1,381 @@ +From 4a9ddf42788d3f924bdad7746f7aca615f03d7c1 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Wed, 11 May 2022 19:49:24 -0500 +Subject: [PATCH 2/2] nbd/server: Allow MULTI_CONN for shared writable exports +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Blake +RH-MergeRequest: 90: Advertise MULTI_CONN on writeable NBD servers +RH-Commit: [2/2] 53f0e885a5ed7f6e4bb14e74fe8e7957e6afe90f (ebblake/centos-qemu-kvm) +RH-Bugzilla: 1708300 +RH-Acked-by: Nir Soffer +RH-Acked-by: Kevin Wolf +RH-Acked-by: Daniel P. Berrangé + +According to the NBD spec, a server that advertises +NBD_FLAG_CAN_MULTI_CONN promises that multiple client connections will +not see any cache inconsistencies: when properly separated by a single +flush, actions performed by one client will be visible to another +client, regardless of which client did the flush. + +We always satisfy these conditions in qemu - even when we support +multiple clients, ALL clients go through a single point of reference +into the block layer, with no local caching. The effect of one client +is instantly visible to the next client. Even if our backend were a +network device, we argue that any multi-path caching effects that +would cause inconsistencies in back-to-back actions not seeing the +effect of previous actions would be a bug in that backend, and not the +fault of caching in qemu. As such, it is safe to unconditionally +advertise CAN_MULTI_CONN for any qemu NBD server situation that +supports parallel clients. + +Note, however, that we don't want to advertise CAN_MULTI_CONN when we +know that a second client cannot connect (for historical reasons, +qemu-nbd defaults to a single connection while nbd-server-add and QMP +commands default to unlimited connections; but we already have +existing means to let either style of NBD server creation alter those +defaults). This is visible by no longer advertising MULTI_CONN for +'qemu-nbd -r' without -e, as in the iotest nbd-qemu-allocation. + +The harder part of this patch is setting up an iotest to demonstrate +behavior of multiple NBD clients to a single server. It might be +possible with parallel qemu-io processes, but I found it easier to do +in python with the help of libnbd, and help from Nir and Vladimir in +writing the test. + +Signed-off-by: Eric Blake +Suggested-by: Nir Soffer +Suggested-by: Vladimir Sementsov-Ogievskiy +Message-Id: <20220512004924.417153-3-eblake@redhat.com> +Signed-off-by: Kevin Wolf + +(cherry picked from commit 58a6fdcc9efb2a7c1ef4893dca4aa5e8020ca3dc) +Conflicts: + nbd/server.c - context, e5fb29d5 not backported +Signed-off-by: Eric Blake +--- + MAINTAINERS | 1 + + blockdev-nbd.c | 5 + + docs/interop/nbd.txt | 1 + + docs/tools/qemu-nbd.rst | 3 +- + include/block/nbd.h | 3 +- + nbd/server.c | 10 +- + qapi/block-export.json | 8 +- + tests/qemu-iotests/tests/nbd-multiconn | 145 ++++++++++++++++++ + tests/qemu-iotests/tests/nbd-multiconn.out | 5 + + .../tests/nbd-qemu-allocation.out | 2 +- + 10 files changed, 172 insertions(+), 11 deletions(-) + create mode 100755 tests/qemu-iotests/tests/nbd-multiconn + create mode 100644 tests/qemu-iotests/tests/nbd-multiconn.out + +diff --git a/MAINTAINERS b/MAINTAINERS +index 4ad2451e03..2fe20a49ab 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -3370,6 +3370,7 @@ F: qemu-nbd.* + F: blockdev-nbd.c + F: docs/interop/nbd.txt + F: docs/tools/qemu-nbd.rst ++F: tests/qemu-iotests/tests/*nbd* + T: git https://repo.or.cz/qemu/ericb.git nbd + T: git https://src.openvz.org/scm/~vsementsov/qemu.git nbd + +diff --git a/blockdev-nbd.c b/blockdev-nbd.c +index add41a23af..c6d9b0324c 100644 +--- a/blockdev-nbd.c ++++ b/blockdev-nbd.c +@@ -44,6 +44,11 @@ bool nbd_server_is_running(void) + return nbd_server || qemu_nbd_connections >= 0; + } + ++int nbd_server_max_connections(void) ++{ ++ return nbd_server ? nbd_server->max_connections : qemu_nbd_connections; ++} ++ + static void nbd_blockdev_client_closed(NBDClient *client, bool ignored) + { + nbd_client_put(client); +diff --git a/docs/interop/nbd.txt b/docs/interop/nbd.txt +index bdb0f2a41a..f5ca25174a 100644 +--- a/docs/interop/nbd.txt ++++ b/docs/interop/nbd.txt +@@ -68,3 +68,4 @@ NBD_CMD_BLOCK_STATUS for "qemu:dirty-bitmap:", NBD_CMD_CACHE + * 4.2: NBD_FLAG_CAN_MULTI_CONN for shareable read-only exports, + NBD_CMD_FLAG_FAST_ZERO + * 5.2: NBD_CMD_BLOCK_STATUS for "qemu:allocation-depth" ++* 7.1: NBD_FLAG_CAN_MULTI_CONN for shareable writable exports +diff --git a/docs/tools/qemu-nbd.rst b/docs/tools/qemu-nbd.rst +index 4c950f6199..8e08a29e89 100644 +--- a/docs/tools/qemu-nbd.rst ++++ b/docs/tools/qemu-nbd.rst +@@ -139,8 +139,7 @@ driver options if :option:`--image-opts` is specified. + .. option:: -e, --shared=NUM + + Allow up to *NUM* clients to share the device (default +- ``1``), 0 for unlimited. Safe for readers, but for now, +- consistency is not guaranteed between multiple writers. ++ ``1``), 0 for unlimited. + + .. option:: -t, --persistent + +diff --git a/include/block/nbd.h b/include/block/nbd.h +index c5a29ce1c6..c74b7a9d2e 100644 +--- a/include/block/nbd.h ++++ b/include/block/nbd.h +@@ -1,5 +1,5 @@ + /* +- * Copyright (C) 2016-2020 Red Hat, Inc. ++ * Copyright (C) 2016-2022 Red Hat, Inc. + * Copyright (C) 2005 Anthony Liguori + * + * Network Block Device +@@ -346,6 +346,7 @@ void nbd_client_put(NBDClient *client); + + void nbd_server_is_qemu_nbd(int max_connections); + bool nbd_server_is_running(void); ++int nbd_server_max_connections(void); + void nbd_server_start(SocketAddress *addr, const char *tls_creds, + const char *tls_authz, uint32_t max_connections, + Error **errp); +diff --git a/nbd/server.c b/nbd/server.c +index c5644fd3f6..6e2157acfa 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -1,5 +1,5 @@ + /* +- * Copyright (C) 2016-2021 Red Hat, Inc. ++ * Copyright (C) 2016-2022 Red Hat, Inc. + * Copyright (C) 2005 Anthony Liguori + * + * Network Block Device Server Side +@@ -1642,7 +1642,6 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args, + int64_t size; + uint64_t perm, shared_perm; + bool readonly = !exp_args->writable; +- bool shared = !exp_args->writable; + strList *bitmaps; + size_t i; + int ret; +@@ -1693,11 +1692,12 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args, + exp->description = g_strdup(arg->description); + exp->nbdflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_FLUSH | + NBD_FLAG_SEND_FUA | NBD_FLAG_SEND_CACHE); ++ ++ if (nbd_server_max_connections() != 1) { ++ exp->nbdflags |= NBD_FLAG_CAN_MULTI_CONN; ++ } + if (readonly) { + exp->nbdflags |= NBD_FLAG_READ_ONLY; +- if (shared) { +- exp->nbdflags |= NBD_FLAG_CAN_MULTI_CONN; +- } + } else { + exp->nbdflags |= (NBD_FLAG_SEND_TRIM | NBD_FLAG_SEND_WRITE_ZEROES | + NBD_FLAG_SEND_FAST_ZERO); +diff --git a/qapi/block-export.json b/qapi/block-export.json +index 1e34927f85..755ccc89b1 100644 +--- a/qapi/block-export.json ++++ b/qapi/block-export.json +@@ -21,7 +21,9 @@ + # recreated on the fly while the NBD server is active. + # If missing, it will default to denying access (since 4.0). + # @max-connections: The maximum number of connections to allow at the same +-# time, 0 for unlimited. (since 5.2; default: 0) ++# time, 0 for unlimited. Setting this to 1 also stops ++# the server from advertising multiple client support ++# (since 5.2; default: 0) + # + # Since: 4.2 + ## +@@ -50,7 +52,9 @@ + # recreated on the fly while the NBD server is active. + # If missing, it will default to denying access (since 4.0). + # @max-connections: The maximum number of connections to allow at the same +-# time, 0 for unlimited. (since 5.2; default: 0) ++# time, 0 for unlimited. Setting this to 1 also stops ++# the server from advertising multiple client support ++# (since 5.2; default: 0). + # + # Returns: error if the server is already running. + # +diff --git a/tests/qemu-iotests/tests/nbd-multiconn b/tests/qemu-iotests/tests/nbd-multiconn +new file mode 100755 +index 0000000000..b121f2e363 +--- /dev/null ++++ b/tests/qemu-iotests/tests/nbd-multiconn +@@ -0,0 +1,145 @@ ++#!/usr/bin/env python3 ++# group: rw auto quick ++# ++# Test cases for NBD multi-conn advertisement ++# ++# Copyright (C) 2022 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++ ++import os ++from contextlib import contextmanager ++import iotests ++from iotests import qemu_img_create, qemu_io ++ ++ ++disk = os.path.join(iotests.test_dir, 'disk') ++size = '4M' ++nbd_sock = os.path.join(iotests.sock_dir, 'nbd_sock') ++nbd_uri = 'nbd+unix:///{}?socket=' + nbd_sock ++ ++ ++@contextmanager ++def open_nbd(export_name): ++ h = nbd.NBD() ++ try: ++ h.connect_uri(nbd_uri.format(export_name)) ++ yield h ++ finally: ++ h.shutdown() ++ ++class TestNbdMulticonn(iotests.QMPTestCase): ++ def setUp(self): ++ qemu_img_create('-f', iotests.imgfmt, disk, size) ++ qemu_io('-c', 'w -P 1 0 2M', '-c', 'w -P 2 2M 2M', disk) ++ ++ self.vm = iotests.VM() ++ self.vm.launch() ++ result = self.vm.qmp('blockdev-add', { ++ 'driver': 'qcow2', ++ 'node-name': 'n', ++ 'file': {'driver': 'file', 'filename': disk} ++ }) ++ self.assert_qmp(result, 'return', {}) ++ ++ def tearDown(self): ++ self.vm.shutdown() ++ os.remove(disk) ++ try: ++ os.remove(nbd_sock) ++ except OSError: ++ pass ++ ++ @contextmanager ++ def run_server(self, max_connections=None): ++ args = { ++ 'addr': { ++ 'type': 'unix', ++ 'data': {'path': nbd_sock} ++ } ++ } ++ if max_connections is not None: ++ args['max-connections'] = max_connections ++ ++ result = self.vm.qmp('nbd-server-start', args) ++ self.assert_qmp(result, 'return', {}) ++ yield ++ ++ result = self.vm.qmp('nbd-server-stop') ++ self.assert_qmp(result, 'return', {}) ++ ++ def add_export(self, name, writable=None): ++ args = { ++ 'type': 'nbd', ++ 'id': name, ++ 'node-name': 'n', ++ 'name': name, ++ } ++ if writable is not None: ++ args['writable'] = writable ++ ++ result = self.vm.qmp('block-export-add', args) ++ self.assert_qmp(result, 'return', {}) ++ ++ def test_default_settings(self): ++ with self.run_server(): ++ self.add_export('r') ++ self.add_export('w', writable=True) ++ with open_nbd('r') as h: ++ self.assertTrue(h.can_multi_conn()) ++ with open_nbd('w') as h: ++ self.assertTrue(h.can_multi_conn()) ++ ++ def test_limited_connections(self): ++ with self.run_server(max_connections=1): ++ self.add_export('r') ++ self.add_export('w', writable=True) ++ with open_nbd('r') as h: ++ self.assertFalse(h.can_multi_conn()) ++ with open_nbd('w') as h: ++ self.assertFalse(h.can_multi_conn()) ++ ++ def test_parallel_writes(self): ++ with self.run_server(): ++ self.add_export('w', writable=True) ++ ++ clients = [nbd.NBD() for _ in range(3)] ++ for c in clients: ++ c.connect_uri(nbd_uri.format('w')) ++ self.assertTrue(c.can_multi_conn()) ++ ++ initial_data = clients[0].pread(1024 * 1024, 0) ++ self.assertEqual(initial_data, b'\x01' * 1024 * 1024) ++ ++ updated_data = b'\x03' * 1024 * 1024 ++ clients[1].pwrite(updated_data, 0) ++ clients[2].flush() ++ current_data = clients[0].pread(1024 * 1024, 0) ++ ++ self.assertEqual(updated_data, current_data) ++ ++ for i in range(3): ++ clients[i].shutdown() ++ ++ ++if __name__ == '__main__': ++ try: ++ # Easier to use libnbd than to try and set up parallel ++ # 'qemu-nbd --list' or 'qemu-io' processes, but not all systems ++ # have libnbd installed. ++ import nbd # type: ignore ++ ++ iotests.main(supported_fmts=['qcow2']) ++ except ImportError: ++ iotests.notrun('libnbd not installed') +diff --git a/tests/qemu-iotests/tests/nbd-multiconn.out b/tests/qemu-iotests/tests/nbd-multiconn.out +new file mode 100644 +index 0000000000..8d7e996700 +--- /dev/null ++++ b/tests/qemu-iotests/tests/nbd-multiconn.out +@@ -0,0 +1,5 @@ ++... ++---------------------------------------------------------------------- ++Ran 3 tests ++ ++OK +diff --git a/tests/qemu-iotests/tests/nbd-qemu-allocation.out b/tests/qemu-iotests/tests/nbd-qemu-allocation.out +index 0bf1abb063..9d938db24e 100644 +--- a/tests/qemu-iotests/tests/nbd-qemu-allocation.out ++++ b/tests/qemu-iotests/tests/nbd-qemu-allocation.out +@@ -17,7 +17,7 @@ wrote 2097152/2097152 bytes at offset 1048576 + exports available: 1 + export: '' + size: 4194304 +- flags: 0x58f ( readonly flush fua df multi cache ) ++ flags: 0x48f ( readonly flush fua df cache ) + min block: 1 + opt block: 4096 + max block: 33554432 +-- +2.31.1 + diff --git a/kvm-numa-Enable-numa-for-SGX-EPC-sections.patch b/kvm-numa-Enable-numa-for-SGX-EPC-sections.patch new file mode 100644 index 0000000..68f7647 --- /dev/null +++ b/kvm-numa-Enable-numa-for-SGX-EPC-sections.patch @@ -0,0 +1,287 @@ +From 35bf6693fb5bba5a9d5fdf4a7fdac06ce574b83d Mon Sep 17 00:00:00 2001 +From: Yang Zhong +Date: Mon, 1 Nov 2021 12:20:05 -0400 +Subject: [PATCH 1/7] numa: Enable numa for SGX EPC sections + +RH-Author: Paul Lai +RH-MergeRequest: 111: numa: Enable numa for SGX EPC sections +RH-Commit: [1/5] c29297cbacc4cb65c9ac125db349a767aa2574af +RH-Bugzilla: 1518984 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Bandan Das +RH-Acked-by: Cornelia Huck + +The basic SGX did not enable numa for SGX EPC sections, which +result in all EPC sections located in numa node 0. This patch +enable SGX numa function in the guest and the EPC section can +work with RAM as one numa node. + +The Guest kernel related log: +[ 0.009981] ACPI: SRAT: Node 0 PXM 0 [mem 0x180000000-0x183ffffff] +[ 0.009982] ACPI: SRAT: Node 1 PXM 1 [mem 0x184000000-0x185bfffff] +The SRAT table can normally show SGX EPC sections menory info in different +numa nodes. + +The SGX EPC numa related command: + ...... + -m 4G,maxmem=20G \ + -smp sockets=2,cores=2 \ + -cpu host,+sgx-provisionkey \ + -object memory-backend-ram,size=2G,host-nodes=0,policy=bind,id=node0 \ + -object memory-backend-epc,id=mem0,size=64M,prealloc=on,host-nodes=0,policy=bind \ + -numa node,nodeid=0,cpus=0-1,memdev=node0 \ + -object memory-backend-ram,size=2G,host-nodes=1,policy=bind,id=node1 \ + -object memory-backend-epc,id=mem1,size=28M,prealloc=on,host-nodes=1,policy=bind \ + -numa node,nodeid=1,cpus=2-3,memdev=node1 \ + -M sgx-epc.0.memdev=mem0,sgx-epc.0.node=0,sgx-epc.1.memdev=mem1,sgx-epc.1.node=1 \ + ...... + +Signed-off-by: Yang Zhong +Message-Id: <20211101162009.62161-2-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 1105812382e1126d86dddc16b3700f8c79dc93d1) +Signed-off-by: Paul Lai +--- + hw/core/numa.c | 5 ++--- + hw/i386/acpi-build.c | 2 ++ + hw/i386/sgx-epc.c | 3 +++ + hw/i386/sgx-stub.c | 4 ++++ + hw/i386/sgx.c | 44 +++++++++++++++++++++++++++++++++++++++ + include/hw/i386/sgx-epc.h | 3 +++ + monitor/hmp-cmds.c | 1 + + qapi/machine.json | 10 ++++++++- + qemu-options.hx | 4 ++-- + 9 files changed, 70 insertions(+), 6 deletions(-) + +diff --git a/hw/core/numa.c b/hw/core/numa.c +index e6050b2273..1aa05dcf42 100644 +--- a/hw/core/numa.c ++++ b/hw/core/numa.c +@@ -784,9 +784,8 @@ static void numa_stat_memory_devices(NumaNodeMem node_mem[]) + break; + case MEMORY_DEVICE_INFO_KIND_SGX_EPC: + se = value->u.sgx_epc.data; +- /* TODO: once we support numa, assign to right node */ +- node_mem[0].node_mem += se->size; +- node_mem[0].node_plugged_mem += se->size; ++ node_mem[se->node].node_mem += se->size; ++ node_mem[se->node].node_plugged_mem = 0; + break; + default: + g_assert_not_reached(); +diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c +index 447ea35275..a4478e77b7 100644 +--- a/hw/i386/acpi-build.c ++++ b/hw/i386/acpi-build.c +@@ -2071,6 +2071,8 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) + nvdimm_build_srat(table_data); + } + ++ sgx_epc_build_srat(table_data); ++ + /* + * TODO: this part is not in ACPI spec and current linux kernel boots fine + * without these entries. But I recall there were issues the last time I +diff --git a/hw/i386/sgx-epc.c b/hw/i386/sgx-epc.c +index e508827e78..96b2940d75 100644 +--- a/hw/i386/sgx-epc.c ++++ b/hw/i386/sgx-epc.c +@@ -21,6 +21,7 @@ + + static Property sgx_epc_properties[] = { + DEFINE_PROP_UINT64(SGX_EPC_ADDR_PROP, SGXEPCDevice, addr, 0), ++ DEFINE_PROP_UINT32(SGX_EPC_NUMA_NODE_PROP, SGXEPCDevice, node, 0), + DEFINE_PROP_LINK(SGX_EPC_MEMDEV_PROP, SGXEPCDevice, hostmem, + TYPE_MEMORY_BACKEND_EPC, HostMemoryBackendEpc *), + DEFINE_PROP_END_OF_LIST(), +@@ -139,6 +140,8 @@ static void sgx_epc_md_fill_device_info(const MemoryDeviceState *md, + se->memaddr = epc->addr; + se->size = object_property_get_uint(OBJECT(epc), SGX_EPC_SIZE_PROP, + NULL); ++ se->node = object_property_get_uint(OBJECT(epc), SGX_EPC_NUMA_NODE_PROP, ++ NULL); + se->memdev = object_get_canonical_path(OBJECT(epc->hostmem)); + + info->u.sgx_epc.data = se; +diff --git a/hw/i386/sgx-stub.c b/hw/i386/sgx-stub.c +index c9b379e665..26833eb233 100644 +--- a/hw/i386/sgx-stub.c ++++ b/hw/i386/sgx-stub.c +@@ -6,6 +6,10 @@ + #include "qapi/error.h" + #include "qapi/qapi-commands-misc-target.h" + ++void sgx_epc_build_srat(GArray *table_data) ++{ ++} ++ + SGXInfo *qmp_query_sgx(Error **errp) + { + error_setg(errp, "SGX support is not compiled in"); +diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c +index 8fef3dd8fa..d04299904a 100644 +--- a/hw/i386/sgx.c ++++ b/hw/i386/sgx.c +@@ -23,6 +23,7 @@ + #include "sysemu/hw_accel.h" + #include "sysemu/reset.h" + #include ++#include "hw/acpi/aml-build.h" + + #define SGX_MAX_EPC_SECTIONS 8 + #define SGX_CPUID_EPC_INVALID 0x0 +@@ -36,6 +37,46 @@ + + #define RETRY_NUM 2 + ++static int sgx_epc_device_list(Object *obj, void *opaque) ++{ ++ GSList **list = opaque; ++ ++ if (object_dynamic_cast(obj, TYPE_SGX_EPC)) { ++ *list = g_slist_append(*list, DEVICE(obj)); ++ } ++ ++ object_child_foreach(obj, sgx_epc_device_list, opaque); ++ return 0; ++} ++ ++static GSList *sgx_epc_get_device_list(void) ++{ ++ GSList *list = NULL; ++ ++ object_child_foreach(qdev_get_machine(), sgx_epc_device_list, &list); ++ return list; ++} ++ ++void sgx_epc_build_srat(GArray *table_data) ++{ ++ GSList *device_list = sgx_epc_get_device_list(); ++ ++ for (; device_list; device_list = device_list->next) { ++ DeviceState *dev = device_list->data; ++ Object *obj = OBJECT(dev); ++ uint64_t addr, size; ++ int node; ++ ++ node = object_property_get_uint(obj, SGX_EPC_NUMA_NODE_PROP, ++ &error_abort); ++ addr = object_property_get_uint(obj, SGX_EPC_ADDR_PROP, &error_abort); ++ size = object_property_get_uint(obj, SGX_EPC_SIZE_PROP, &error_abort); ++ ++ build_srat_memory(table_data, addr, size, node, MEM_AFFINITY_ENABLED); ++ } ++ g_slist_free(device_list); ++} ++ + static uint64_t sgx_calc_section_metric(uint64_t low, uint64_t high) + { + return (low & MAKE_64BIT_MASK(12, 20)) + +@@ -226,6 +267,9 @@ void pc_machine_init_sgx_epc(PCMachineState *pcms) + /* set the memdev link with memory backend */ + object_property_parse(obj, SGX_EPC_MEMDEV_PROP, list->value->memdev, + &error_fatal); ++ /* set the numa node property for sgx epc object */ ++ object_property_set_uint(obj, SGX_EPC_NUMA_NODE_PROP, list->value->node, ++ &error_fatal); + object_property_set_bool(obj, "realized", true, &error_fatal); + object_unref(obj); + } +diff --git a/include/hw/i386/sgx-epc.h b/include/hw/i386/sgx-epc.h +index a6a65be854..581fac389a 100644 +--- a/include/hw/i386/sgx-epc.h ++++ b/include/hw/i386/sgx-epc.h +@@ -25,6 +25,7 @@ + #define SGX_EPC_ADDR_PROP "addr" + #define SGX_EPC_SIZE_PROP "size" + #define SGX_EPC_MEMDEV_PROP "memdev" ++#define SGX_EPC_NUMA_NODE_PROP "node" + + /** + * SGXEPCDevice: +@@ -38,6 +39,7 @@ typedef struct SGXEPCDevice { + + /* public */ + uint64_t addr; ++ uint32_t node; + HostMemoryBackendEpc *hostmem; + } SGXEPCDevice; + +@@ -56,6 +58,7 @@ typedef struct SGXEPCState { + } SGXEPCState; + + bool sgx_epc_get_section(int section_nr, uint64_t *addr, uint64_t *size); ++void sgx_epc_build_srat(GArray *table_data); + + static inline uint64_t sgx_epc_above_4g_end(SGXEPCState *sgx_epc) + { +diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c +index 9c91bf93e9..2669156b28 100644 +--- a/monitor/hmp-cmds.c ++++ b/monitor/hmp-cmds.c +@@ -1810,6 +1810,7 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict) + se->id ? se->id : ""); + monitor_printf(mon, " memaddr: 0x%" PRIx64 "\n", se->memaddr); + monitor_printf(mon, " size: %" PRIu64 "\n", se->size); ++ monitor_printf(mon, " node: %" PRId64 "\n", se->node); + monitor_printf(mon, " memdev: %s\n", se->memdev); + break; + default: +diff --git a/qapi/machine.json b/qapi/machine.json +index 067e3f5378..16e771affc 100644 +--- a/qapi/machine.json ++++ b/qapi/machine.json +@@ -1207,12 +1207,15 @@ + # + # @memdev: memory backend linked with device + # ++# @node: the numa node ++# + # Since: 6.2 + ## + { 'struct': 'SgxEPCDeviceInfo', + 'data': { '*id': 'str', + 'memaddr': 'size', + 'size': 'size', ++ 'node': 'int', + 'memdev': 'str' + } + } +@@ -1285,10 +1288,15 @@ + # + # @memdev: memory backend linked with device + # ++# @node: the numa node ++# + # Since: 6.2 + ## + { 'struct': 'SgxEPC', +- 'data': { 'memdev': 'str' } } ++ 'data': { 'memdev': 'str', ++ 'node': 'int' ++ } ++} + + ## + # @SgxEPCProperties: +diff --git a/qemu-options.hx b/qemu-options.hx +index 94c4a8dbaf..4b7798088b 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -127,11 +127,11 @@ SRST + ERST + + DEF("M", HAS_ARG, QEMU_OPTION_M, +- " sgx-epc.0.memdev=memid\n", ++ " sgx-epc.0.memdev=memid,sgx-epc.0.node=numaid\n", + QEMU_ARCH_ALL) + + SRST +-``sgx-epc.0.memdev=@var{memid}`` ++``sgx-epc.0.memdev=@var{memid},sgx-epc.0.node=@var{numaid}`` + Define an SGX EPC section. + ERST + +-- +2.27.0 + diff --git a/kvm-numa-Support-SGX-numa-in-the-monitor-and-Libvirt-int.patch b/kvm-numa-Support-SGX-numa-in-the-monitor-and-Libvirt-int.patch new file mode 100644 index 0000000..659dc22 --- /dev/null +++ b/kvm-numa-Support-SGX-numa-in-the-monitor-and-Libvirt-int.patch @@ -0,0 +1,210 @@ +From ea46a86ba6319ea98573c65af5186cd5399ab0ce Mon Sep 17 00:00:00 2001 +From: Yang Zhong +Date: Mon, 1 Nov 2021 12:20:07 -0400 +Subject: [PATCH 2/7] numa: Support SGX numa in the monitor and Libvirt + interfaces + +RH-Author: Paul Lai +RH-MergeRequest: 111: numa: Enable numa for SGX EPC sections +RH-Commit: [2/5] 403c4f98dccd023293cd3246081ae12f4782bed0 +RH-Bugzilla: 1518984 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Bandan Das +RH-Acked-by: Cornelia Huck + +Add the SGXEPCSection list into SGXInfo to show the multiple +SGX EPC sections detailed info, not the total size like before. +This patch can enable numa support for 'info sgx' command and +QMP interfaces. The new interfaces show each EPC section info +in one numa node. Libvirt can use QMP interface to get the +detailed host SGX EPC capabilities to decide how to allocate +host EPC sections to guest. + +(qemu) info sgx + SGX support: enabled + SGX1 support: enabled + SGX2 support: enabled + FLC support: enabled + NUMA node #0: size=67108864 + NUMA node #1: size=29360128 + +The QMP interface show: +(QEMU) query-sgx +{"return": {"sgx": true, "sgx2": true, "sgx1": true, "sections": \ +[{"node": 0, "size": 67108864}, {"node": 1, "size": 29360128}], "flc": true}} + +(QEMU) query-sgx-capabilities +{"return": {"sgx": true, "sgx2": true, "sgx1": true, "sections": \ +[{"node": 0, "size": 17070817280}, {"node": 1, "size": 17079205888}], "flc": true}} + +Signed-off-by: Yang Zhong +Message-Id: <20211101162009.62161-4-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 4755927ae12547c2e7cb22c5fa1b39038c6c11b1) +Signed-off-by: Paul Lai +--- + hw/i386/sgx.c | 51 +++++++++++++++++++++++++++++++++++-------- + qapi/misc-target.json | 19 ++++++++++++++-- + 2 files changed, 59 insertions(+), 11 deletions(-) + +diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c +index d04299904a..5de5dd0893 100644 +--- a/hw/i386/sgx.c ++++ b/hw/i386/sgx.c +@@ -83,11 +83,13 @@ static uint64_t sgx_calc_section_metric(uint64_t low, uint64_t high) + ((high & MAKE_64BIT_MASK(0, 20)) << 32); + } + +-static uint64_t sgx_calc_host_epc_section_size(void) ++static SGXEPCSectionList *sgx_calc_host_epc_sections(void) + { ++ SGXEPCSectionList *head = NULL, **tail = &head; ++ SGXEPCSection *section; + uint32_t i, type; + uint32_t eax, ebx, ecx, edx; +- uint64_t size = 0; ++ uint32_t j = 0; + + for (i = 0; i < SGX_MAX_EPC_SECTIONS; i++) { + host_cpuid(0x12, i + 2, &eax, &ebx, &ecx, &edx); +@@ -101,10 +103,13 @@ static uint64_t sgx_calc_host_epc_section_size(void) + break; + } + +- size += sgx_calc_section_metric(ecx, edx); ++ section = g_new0(SGXEPCSection, 1); ++ section->node = j++; ++ section->size = sgx_calc_section_metric(ecx, edx); ++ QAPI_LIST_APPEND(tail, section); + } + +- return size; ++ return head; + } + + static void sgx_epc_reset(void *opaque) +@@ -168,13 +173,35 @@ SGXInfo *qmp_query_sgx_capabilities(Error **errp) + info->sgx1 = eax & (1U << 0) ? true : false; + info->sgx2 = eax & (1U << 1) ? true : false; + +- info->section_size = sgx_calc_host_epc_section_size(); ++ info->sections = sgx_calc_host_epc_sections(); + + close(fd); + + return info; + } + ++static SGXEPCSectionList *sgx_get_epc_sections_list(void) ++{ ++ GSList *device_list = sgx_epc_get_device_list(); ++ SGXEPCSectionList *head = NULL, **tail = &head; ++ SGXEPCSection *section; ++ ++ for (; device_list; device_list = device_list->next) { ++ DeviceState *dev = device_list->data; ++ Object *obj = OBJECT(dev); ++ ++ section = g_new0(SGXEPCSection, 1); ++ section->node = object_property_get_uint(obj, SGX_EPC_NUMA_NODE_PROP, ++ &error_abort); ++ section->size = object_property_get_uint(obj, SGX_EPC_SIZE_PROP, ++ &error_abort); ++ QAPI_LIST_APPEND(tail, section); ++ } ++ g_slist_free(device_list); ++ ++ return head; ++} ++ + SGXInfo *qmp_query_sgx(Error **errp) + { + SGXInfo *info = NULL; +@@ -193,14 +220,13 @@ SGXInfo *qmp_query_sgx(Error **errp) + return NULL; + } + +- SGXEPCState *sgx_epc = &pcms->sgx_epc; + info = g_new0(SGXInfo, 1); + + info->sgx = true; + info->sgx1 = true; + info->sgx2 = true; + info->flc = true; +- info->section_size = sgx_epc->size; ++ info->sections = sgx_get_epc_sections_list(); + + return info; + } +@@ -208,6 +234,7 @@ SGXInfo *qmp_query_sgx(Error **errp) + void hmp_info_sgx(Monitor *mon, const QDict *qdict) + { + Error *err = NULL; ++ SGXEPCSectionList *section_list, *section; + g_autoptr(SGXInfo) info = qmp_query_sgx(&err); + + if (err) { +@@ -222,8 +249,14 @@ void hmp_info_sgx(Monitor *mon, const QDict *qdict) + info->sgx2 ? "enabled" : "disabled"); + monitor_printf(mon, "FLC support: %s\n", + info->flc ? "enabled" : "disabled"); +- monitor_printf(mon, "size: %" PRIu64 "\n", +- info->section_size); ++ ++ section_list = info->sections; ++ for (section = section_list; section; section = section->next) { ++ monitor_printf(mon, "NUMA node #%" PRId64 ": ", ++ section->value->node); ++ monitor_printf(mon, "size=%" PRIu64 "\n", ++ section->value->size); ++ } + } + + bool sgx_epc_get_section(int section_nr, uint64_t *addr, uint64_t *size) +diff --git a/qapi/misc-target.json b/qapi/misc-target.json +index 5aa2b95b7d..1022aa0184 100644 +--- a/qapi/misc-target.json ++++ b/qapi/misc-target.json +@@ -337,6 +337,21 @@ + 'if': 'TARGET_ARM' } + + ++## ++# @SGXEPCSection: ++# ++# Information about intel SGX EPC section info ++# ++# @node: the numa node ++# ++# @size: the size of epc section ++# ++# Since: 6.2 ++## ++{ 'struct': 'SGXEPCSection', ++ 'data': { 'node': 'int', ++ 'size': 'uint64'}} ++ + ## + # @SGXInfo: + # +@@ -350,7 +365,7 @@ + # + # @flc: true if FLC is supported + # +-# @section-size: The EPC section size for guest ++# @sections: The EPC sections info for guest + # + # Since: 6.2 + ## +@@ -359,7 +374,7 @@ + 'sgx1': 'bool', + 'sgx2': 'bool', + 'flc': 'bool', +- 'section-size': 'uint64'}, ++ 'sections': ['SGXEPCSection']}, + 'if': 'TARGET_I386' } + + ## +-- +2.27.0 + diff --git a/kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch b/kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch new file mode 100644 index 0000000..b212194 --- /dev/null +++ b/kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch @@ -0,0 +1,180 @@ +From 2e38b4ec5c53b2b98539a70105d3046e1c452ab8 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 20:49:01 +0200 +Subject: [PATCH 13/17] pc-bios/s390-ccw: Split virtio-scsi code from + virtio_blk_setup_device() + +RH-Author: Thomas Huth +RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [8/10] f49c5fb77e05c9dc09ed9f037e37f6a461e4bba6 (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2098077 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098077 + +commit cf30b7c4a9b2c64518be8037c2e6670aacdb00b9 +Author: Thomas Huth +Date: Mon Jul 4 13:19:00 2022 +0200 + + pc-bios/s390-ccw: Split virtio-scsi code from virtio_blk_setup_device() + + The next patch is going to add more virtio-block specific code to + virtio_blk_setup_device(), and if the virtio-scsi code is also in + there, this is more cumbersome. And the calling function virtio_setup() + in main.c looks at the device type already anyway, so it's more + logical to separate the virtio-scsi stuff into a new function in + virtio-scsi.c instead. + + Message-Id: <20220704111903.62400-10-thuth@redhat.com> + Reviewed-by: Eric Farman + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/main.c | 24 +++++++++++++++++------- + pc-bios/s390-ccw/virtio-blkdev.c | 20 ++------------------ + pc-bios/s390-ccw/virtio-scsi.c | 19 ++++++++++++++++++- + pc-bios/s390-ccw/virtio-scsi.h | 2 +- + 4 files changed, 38 insertions(+), 27 deletions(-) + +diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c +index 5d2b7ba94d..13e1d8fdf7 100644 +--- a/pc-bios/s390-ccw/main.c ++++ b/pc-bios/s390-ccw/main.c +@@ -14,6 +14,7 @@ + #include "s390-ccw.h" + #include "cio.h" + #include "virtio.h" ++#include "virtio-scsi.h" + #include "dasd-ipl.h" + + char stack[PAGE_SIZE * 8] __attribute__((__aligned__(PAGE_SIZE))); +@@ -218,6 +219,7 @@ static int virtio_setup(void) + { + VDev *vdev = virtio_get_device(); + QemuIplParameters *early_qipl = (QemuIplParameters *)QIPL_ADDRESS; ++ int ret; + + memcpy(&qipl, early_qipl, sizeof(QemuIplParameters)); + +@@ -225,18 +227,26 @@ static int virtio_setup(void) + menu_setup(); + } + +- if (virtio_get_device_type() == VIRTIO_ID_NET) { ++ switch (vdev->senseid.cu_model) { ++ case VIRTIO_ID_NET: + sclp_print("Network boot device detected\n"); + vdev->netboot_start_addr = qipl.netboot_start_addr; +- } else { +- int ret = virtio_blk_setup_device(blk_schid); +- if (ret) { +- return ret; +- } ++ return 0; ++ case VIRTIO_ID_BLOCK: ++ ret = virtio_blk_setup_device(blk_schid); ++ break; ++ case VIRTIO_ID_SCSI: ++ ret = virtio_scsi_setup_device(blk_schid); ++ break; ++ default: ++ panic("\n! No IPL device available !\n"); ++ } ++ ++ if (!ret) { + IPL_assert(virtio_ipl_disk_is_valid(), "No valid IPL device detected"); + } + +- return 0; ++ return ret; + } + + static void ipl_boot_device(void) +diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c +index db1f7f44aa..c175b66a47 100644 +--- a/pc-bios/s390-ccw/virtio-blkdev.c ++++ b/pc-bios/s390-ccw/virtio-blkdev.c +@@ -222,27 +222,11 @@ uint64_t virtio_get_blocks(void) + int virtio_blk_setup_device(SubChannelId schid) + { + VDev *vdev = virtio_get_device(); +- int ret = 0; + + vdev->schid = schid; + virtio_setup_ccw(vdev); + +- switch (vdev->senseid.cu_model) { +- case VIRTIO_ID_BLOCK: +- sclp_print("Using virtio-blk.\n"); +- break; +- case VIRTIO_ID_SCSI: +- IPL_assert(vdev->config.scsi.sense_size == VIRTIO_SCSI_SENSE_SIZE, +- "Config: sense size mismatch"); +- IPL_assert(vdev->config.scsi.cdb_size == VIRTIO_SCSI_CDB_SIZE, +- "Config: CDB size mismatch"); ++ sclp_print("Using virtio-blk.\n"); + +- sclp_print("Using virtio-scsi.\n"); +- ret = virtio_scsi_setup(vdev); +- break; +- default: +- panic("\n! No IPL device available !\n"); +- } +- +- return ret; ++ return 0; + } +diff --git a/pc-bios/s390-ccw/virtio-scsi.c b/pc-bios/s390-ccw/virtio-scsi.c +index 2c8d0f3097..3b7069270c 100644 +--- a/pc-bios/s390-ccw/virtio-scsi.c ++++ b/pc-bios/s390-ccw/virtio-scsi.c +@@ -329,7 +329,7 @@ static void scsi_parse_capacity_report(void *data, + } + } + +-int virtio_scsi_setup(VDev *vdev) ++static int virtio_scsi_setup(VDev *vdev) + { + int retry_test_unit_ready = 3; + uint8_t data[256]; +@@ -430,3 +430,20 @@ int virtio_scsi_setup(VDev *vdev) + + return 0; + } ++ ++int virtio_scsi_setup_device(SubChannelId schid) ++{ ++ VDev *vdev = virtio_get_device(); ++ ++ vdev->schid = schid; ++ virtio_setup_ccw(vdev); ++ ++ IPL_assert(vdev->config.scsi.sense_size == VIRTIO_SCSI_SENSE_SIZE, ++ "Config: sense size mismatch"); ++ IPL_assert(vdev->config.scsi.cdb_size == VIRTIO_SCSI_CDB_SIZE, ++ "Config: CDB size mismatch"); ++ ++ sclp_print("Using virtio-scsi.\n"); ++ ++ return virtio_scsi_setup(vdev); ++} +diff --git a/pc-bios/s390-ccw/virtio-scsi.h b/pc-bios/s390-ccw/virtio-scsi.h +index 4b14c2c2f9..e6b6cd4815 100644 +--- a/pc-bios/s390-ccw/virtio-scsi.h ++++ b/pc-bios/s390-ccw/virtio-scsi.h +@@ -67,8 +67,8 @@ static inline bool virtio_scsi_response_ok(const VirtioScsiCmdResp *r) + return r->response == VIRTIO_SCSI_S_OK && r->status == CDB_STATUS_GOOD; + } + +-int virtio_scsi_setup(VDev *vdev); + int virtio_scsi_read_many(VDev *vdev, + ulong sector, void *load_addr, int sec_num); ++int virtio_scsi_setup_device(SubChannelId schid); + + #endif /* VIRTIO_SCSI_H */ +-- +2.31.1 + diff --git a/kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch b/kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch new file mode 100644 index 0000000..231a8a0 --- /dev/null +++ b/kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch @@ -0,0 +1,102 @@ +From 64fa56e0520215e3909e442f09d8073c1870648a Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 20:49:01 +0200 +Subject: [PATCH 07/17] pc-bios/s390-ccw/bootmap: Improve the guessing logic in + zipl_load_vblk() + +RH-Author: Thomas Huth +RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [2/10] ca8f5e847617cf4ac2fd6c38edb2982f32fa3eba (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2098077 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098077 + +commit 422865f6672ee1482b98d18321b55c1ecfb06c82 +Author: Thomas Huth +Date: Mon Jul 4 13:18:54 2022 +0200 + + pc-bios/s390-ccw/bootmap: Improve the guessing logic in zipl_load_vblk() + + The logic of trying an final ISO or ECKD boot on virtio-block devices is + very weird: Since the geometry hardly ever matches in virtio_disk_is_scsi(), + virtio_blk_setup_device() always sets a "guessed" disk geometry via + virtio_assume_scsi() (which is certainly also wrong in a lot of cases). + + zipl_load_vblk() then sees that there's been a "virtio_guessed_disk_nature" + and tries to fix up the geometry again via virtio_assume_iso9660() before + always trying to do ipl_iso_el_torito(). That's a very brain-twisting + way of attempting to boot from ISO images, which won't work anymore after + the following patches that will clean up the virtio_assume_scsi() mess + (and thus get rid of the "virtio_guessed_disk_nature" here). + + Let's try a better approach instead: ISO files always have a magic + string "CD001" at offset 0x8001 (see e.g. the ECMA-119 specification) + which we can use to decide whether we should try to boot in ISO 9660 + mode (which we should also try if we see a sector size of 2048). + + And if we were not able to boot in ISO mode here, the final boot attempt + before panicking is to boot in ECKD mode. Since this is our last boot + attempt anyway, simply always assume the ECKD geometry here (if the sector + size was not 4096 yet), so that we also do not depend on the guessed disk + geometry from virtio_blk_setup_device() here anymore. + + Message-Id: <20220704111903.62400-4-thuth@redhat.com> + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/bootmap.c | 27 +++++++++++++++++++++++---- + 1 file changed, 23 insertions(+), 4 deletions(-) + +diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c +index 56411ab3b6..994e59c0b0 100644 +--- a/pc-bios/s390-ccw/bootmap.c ++++ b/pc-bios/s390-ccw/bootmap.c +@@ -780,18 +780,37 @@ static void ipl_iso_el_torito(void) + } + } + ++/** ++ * Detect whether we're trying to boot from an .ISO image. ++ * These always have a signature string "CD001" at offset 0x8001. ++ */ ++static bool has_iso_signature(void) ++{ ++ int blksize = virtio_get_block_size(); ++ ++ if (!blksize || virtio_read(0x8000 / blksize, sec)) { ++ return false; ++ } ++ ++ return !memcmp("CD001", &sec[1], 5); ++} ++ + /*********************************************************************** + * Bus specific IPL sequences + */ + + static void zipl_load_vblk(void) + { +- if (virtio_guessed_disk_nature()) { +- virtio_assume_iso9660(); ++ int blksize = virtio_get_block_size(); ++ ++ if (blksize == VIRTIO_ISO_BLOCK_SIZE || has_iso_signature()) { ++ if (blksize != VIRTIO_ISO_BLOCK_SIZE) { ++ virtio_assume_iso9660(); ++ } ++ ipl_iso_el_torito(); + } +- ipl_iso_el_torito(); + +- if (virtio_guessed_disk_nature()) { ++ if (blksize != VIRTIO_DASD_DEFAULT_BLOCK_SIZE) { + sclp_print("Using guessed DASD geometry.\n"); + virtio_assume_eckd(); + } +-- +2.31.1 + diff --git a/kvm-pc-bios-s390-ccw-netboot.mak-Ignore-Clang-s-warnings.patch b/kvm-pc-bios-s390-ccw-netboot.mak-Ignore-Clang-s-warnings.patch new file mode 100644 index 0000000..00601aa --- /dev/null +++ b/kvm-pc-bios-s390-ccw-netboot.mak-Ignore-Clang-s-warnings.patch @@ -0,0 +1,78 @@ +From 56674ee1f25f12978a6a8a1390e11b55b3e0fabe Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 20:49:01 +0200 +Subject: [PATCH 15/17] pc-bios/s390-ccw/netboot.mak: Ignore Clang's warnings + about GNU extensions + +RH-Author: Thomas Huth +RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [10/10] 037dab4df23ebb2b42871bca8c842a53a7204b50 (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2098077 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098077 + +commit e2269220acb03e6c6a460c3090d804835e202239 +Author: Thomas Huth +Date: Mon Jul 4 13:19:03 2022 +0200 + + pc-bios/s390-ccw/netboot.mak: Ignore Clang's warnings about GNU extensions + + When compiling the s390-ccw bios with Clang (v14.0), there is currently + an unuseful warning like this: + + CC pc-bios/s390-ccw/ipv6.o + ../../roms/SLOF/lib/libnet/ipv6.c:447:18: warning: variable length array + folded to constant array as an extension [-Wgnu-folding-constant] + unsigned short raw[ip6size]; + ^ + + SLOF is currently GCC-only and cannot be compiled with Clang yet, so + it is expected that such extensions sneak in there - and as long as + we don't want to compile the code with a compiler that is neither GCC + or Clang, it is also not necessary to avoid such extensions. + + Thus these GNU-extension related warnings are completely useless in + the s390-ccw bios, especially in the code that is coming from SLOF, + so we should simply disable the related warnings here now. + + Message-Id: <20220704111903.62400-13-thuth@redhat.com> + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/netboot.mak | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/pc-bios/s390-ccw/netboot.mak b/pc-bios/s390-ccw/netboot.mak +index 68b4d7edcb..ad41898cb6 100644 +--- a/pc-bios/s390-ccw/netboot.mak ++++ b/pc-bios/s390-ccw/netboot.mak +@@ -16,9 +16,12 @@ s390-netboot.elf: $(NETOBJS) libnet.a libc.a + s390-netboot.img: s390-netboot.elf + $(call quiet-command,$(STRIP) --strip-unneeded $< -o $@,"STRIP","$(TARGET_DIR)$@") + ++# SLOF is GCC-only, so ignore warnings about GNU extensions with Clang here ++NO_GNU_WARN := $(call cc-option,-Werror $(QEMU_CFLAGS),-Wno-gnu) ++ + # libc files: + +-LIBC_CFLAGS = $(QEMU_CFLAGS) $(CFLAGS) $(LIBC_INC) $(LIBNET_INC) \ ++LIBC_CFLAGS = $(QEMU_CFLAGS) $(CFLAGS) $(NO_GNU_WARN) $(LIBC_INC) $(LIBNET_INC) \ + -MMD -MP -MT $@ -MF $(@:%.o=%.d) + + CTYPE_OBJS = isdigit.o isxdigit.o toupper.o +@@ -52,7 +55,7 @@ libc.a: $(LIBCOBJS) + + LIBNETOBJS := args.o dhcp.o dns.o icmpv6.o ipv6.o tcp.o udp.o bootp.o \ + dhcpv6.o ethernet.o ipv4.o ndp.o tftp.o pxelinux.o +-LIBNETCFLAGS = $(QEMU_CFLAGS) $(CFLAGS) $(LIBC_INC) $(LIBNET_INC) \ ++LIBNETCFLAGS = $(QEMU_CFLAGS) $(CFLAGS) $(NO_GNU_WARN) $(LIBC_INC) $(LIBNET_INC) \ + -DDHCPARCH=0x1F -MMD -MP -MT $@ -MF $(@:%.o=%.d) + + %.o : $(SLOF_DIR)/lib/libnet/%.c +-- +2.31.1 + diff --git a/kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch b/kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch new file mode 100644 index 0000000..5e4b689 --- /dev/null +++ b/kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch @@ -0,0 +1,56 @@ +From 430e76fd964390db86c8486f76b916a1cf7f74c2 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 20:49:01 +0200 +Subject: [PATCH 12/17] pc-bios/s390-ccw/virtio: Beautify the code for reading + virtqueue configuration + +RH-Author: Thomas Huth +RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [7/10] b15c06b4c5431837672b6cb5d57d09da20718441 (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2098077 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098077 + +commit 070824885741f5d2a66626d3c4ecb2773c8e0552 +Author: Thomas Huth +Date: Mon Jul 4 13:18:59 2022 +0200 + + pc-bios/s390-ccw/virtio: Beautify the code for reading virtqueue configuration + + It looks nicer if we separate the run_ccw() from the IPL_assert() + statement, and the error message should talk about "virtio device" + instead of "block device", since this code is nowadays used for + non-block (i.e. network) devices, too. + + Message-Id: <20220704111903.62400-9-thuth@redhat.com> + Reviewed-by: Cornelia Huck + Reviewed-by: Eric Farman + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/virtio.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c +index d8c2b52710..f37510f312 100644 +--- a/pc-bios/s390-ccw/virtio.c ++++ b/pc-bios/s390-ccw/virtio.c +@@ -289,9 +289,8 @@ void virtio_setup_ccw(VDev *vdev) + .num = 0, + }; + +- IPL_assert( +- run_ccw(vdev, CCW_CMD_READ_VQ_CONF, &config, sizeof(config), false) == 0, +- "Could not get block device VQ configuration"); ++ rc = run_ccw(vdev, CCW_CMD_READ_VQ_CONF, &config, sizeof(config), false); ++ IPL_assert(rc == 0, "Could not get virtio device VQ configuration"); + info.num = config.num; + vring_init(&vdev->vrings[i], &info); + vdev->vrings[i].schid = vdev->schid; +-- +2.31.1 + diff --git a/kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch b/kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch new file mode 100644 index 0000000..04ab605 --- /dev/null +++ b/kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch @@ -0,0 +1,63 @@ +From 7d4f2454f95bfc087ad3f2fe3bc4625dcea3568e Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 20:49:01 +0200 +Subject: [PATCH 06/17] pc-bios/s390-ccw/virtio: Introduce a macro for the DASD + block size + +RH-Author: Thomas Huth +RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [1/10] 71033934e1e9988bcf71362e02665ceb7449009d (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2098077 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098077 + +commit 1f2c2ee48e87ea743f8e23cc7569dd26c4cf9623 +Author: Thomas Huth +Date: Mon Jul 4 13:18:53 2022 +0200 + + pc-bios/s390-ccw/virtio: Introduce a macro for the DASD block size + + Use VIRTIO_DASD_DEFAULT_BLOCK_SIZE instead of the magic value 4096. + + Message-Id: <20220704111903.62400-3-thuth@redhat.com> + Reviewed-by: Eric Farman + Reviewed-by: Cornelia Huck + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/virtio-blkdev.c | 2 +- + pc-bios/s390-ccw/virtio.h | 1 + + 2 files changed, 2 insertions(+), 1 deletion(-) + +diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c +index 7d35050292..6483307630 100644 +--- a/pc-bios/s390-ccw/virtio-blkdev.c ++++ b/pc-bios/s390-ccw/virtio-blkdev.c +@@ -155,7 +155,7 @@ void virtio_assume_eckd(void) + vdev->config.blk.physical_block_exp = 0; + switch (vdev->senseid.cu_model) { + case VIRTIO_ID_BLOCK: +- vdev->config.blk.blk_size = 4096; ++ vdev->config.blk.blk_size = VIRTIO_DASD_DEFAULT_BLOCK_SIZE; + break; + case VIRTIO_ID_SCSI: + vdev->config.blk.blk_size = vdev->scsi_block_size; +diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h +index 19fceb6495..9e410bde6f 100644 +--- a/pc-bios/s390-ccw/virtio.h ++++ b/pc-bios/s390-ccw/virtio.h +@@ -198,6 +198,7 @@ extern int virtio_read_many(ulong sector, void *load_addr, int sec_num); + #define VIRTIO_SECTOR_SIZE 512 + #define VIRTIO_ISO_BLOCK_SIZE 2048 + #define VIRTIO_SCSI_BLOCK_SIZE 512 ++#define VIRTIO_DASD_DEFAULT_BLOCK_SIZE 4096 + + static inline ulong virtio_sector_adjust(ulong sector) + { +-- +2.31.1 + diff --git a/kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch b/kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch new file mode 100644 index 0000000..41ae538 --- /dev/null +++ b/kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch @@ -0,0 +1,67 @@ +From 20f8724d0837acbe642c8c7698a4b256f34c1209 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 20:49:01 +0200 +Subject: [PATCH 11/17] pc-bios/s390-ccw/virtio: Read device config after + feature negotiation + +RH-Author: Thomas Huth +RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [6/10] 54d21e430b2dfba9e0a0823d6bb8ec7e7f8ff2ff (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2098077 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098077 + +commit aa5c69ce99411c4886bcd051f288afc02b6d968d +Author: Thomas Huth +Date: Mon Jul 4 13:18:58 2022 +0200 + + pc-bios/s390-ccw/virtio: Read device config after feature negotiation + + Feature negotiation should be done first, since some fields in the + config area can depend on the negotiated features and thus should + rather be read afterwards. + + While we're at it, also adjust the error message here a little bit + (the code is nowadays used for non-block virtio devices, too). + + Message-Id: <20220704111903.62400-8-thuth@redhat.com> + Reviewed-by: Eric Farman + Reviewed-by: Cornelia Huck + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/virtio.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c +index 4e85a2eb82..d8c2b52710 100644 +--- a/pc-bios/s390-ccw/virtio.c ++++ b/pc-bios/s390-ccw/virtio.c +@@ -262,10 +262,6 @@ void virtio_setup_ccw(VDev *vdev) + rc = run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false); + IPL_assert(rc == 0, "Could not write DRIVER status to host"); + +- IPL_assert( +- run_ccw(vdev, CCW_CMD_READ_CONF, &vdev->config, cfg_size, false) == 0, +- "Could not get block device configuration"); +- + /* Feature negotiation */ + for (i = 0; i < ARRAY_SIZE(vdev->guest_features); i++) { + feats.features = 0; +@@ -278,6 +274,9 @@ void virtio_setup_ccw(VDev *vdev) + IPL_assert(rc == 0, "Could not set features bits"); + } + ++ rc = run_ccw(vdev, CCW_CMD_READ_CONF, &vdev->config, cfg_size, false); ++ IPL_assert(rc == 0, "Could not get virtio device configuration"); ++ + for (i = 0; i < vdev->nr_vqs; i++) { + VqInfo info = { + .queue = (unsigned long long) ring_area + (i * VIRTIO_RING_SIZE), +-- +2.31.1 + diff --git a/kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch b/kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch new file mode 100644 index 0000000..e976047 --- /dev/null +++ b/kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch @@ -0,0 +1,93 @@ +From 303fb3ddcdbbd1373c5b1aa28e03f90507e217f3 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 20:49:01 +0200 +Subject: [PATCH 10/17] pc-bios/s390-ccw/virtio: Set missing status bits while + initializing + +RH-Author: Thomas Huth +RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [5/10] 4bc44d9adae055fb60b79d04a2f08535b4d38d2b (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2098077 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098077 + +commit 175aa06a152ef6b58ba9b2e47a1296b024dea70c +Author: Thomas Huth +Date: Mon Jul 4 13:18:57 2022 +0200 + + pc-bios/s390-ccw/virtio: Set missing status bits while initializing + + According chapter "3.1.1 Driver Requirements: Device Initialization" + of the Virtio specification (v1.1), a driver for a device has to set + the ACKNOWLEDGE and DRIVER bits in the status field after resetting + the device. The s390-ccw bios skipped these steps so far and seems + like QEMU never cared. Anyway, it's better to follow the spec, so + let's set these bits now in the right spots, too. + + Message-Id: <20220704111903.62400-7-thuth@redhat.com> + Acked-by: Christian Borntraeger + Reviewed-by: Cornelia Huck + Reviewed-by: Eric Farman + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/virtio.c | 18 ++++++++++++++---- + 1 file changed, 14 insertions(+), 4 deletions(-) + +diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c +index 5d2c6e3381..4e85a2eb82 100644 +--- a/pc-bios/s390-ccw/virtio.c ++++ b/pc-bios/s390-ccw/virtio.c +@@ -220,7 +220,7 @@ int virtio_run(VDev *vdev, int vqid, VirtioCmd *cmd) + void virtio_setup_ccw(VDev *vdev) + { + int i, rc, cfg_size = 0; +- unsigned char status = VIRTIO_CONFIG_S_DRIVER_OK; ++ uint8_t status; + struct VirtioFeatureDesc { + uint32_t features; + uint8_t index; +@@ -234,6 +234,10 @@ void virtio_setup_ccw(VDev *vdev) + + run_ccw(vdev, CCW_CMD_VDEV_RESET, NULL, 0, false); + ++ status = VIRTIO_CONFIG_S_ACKNOWLEDGE; ++ rc = run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false); ++ IPL_assert(rc == 0, "Could not write ACKNOWLEDGE status to host"); ++ + switch (vdev->senseid.cu_model) { + case VIRTIO_ID_NET: + vdev->nr_vqs = 2; +@@ -253,6 +257,11 @@ void virtio_setup_ccw(VDev *vdev) + default: + panic("Unsupported virtio device\n"); + } ++ ++ status |= VIRTIO_CONFIG_S_DRIVER; ++ rc = run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false); ++ IPL_assert(rc == 0, "Could not write DRIVER status to host"); ++ + IPL_assert( + run_ccw(vdev, CCW_CMD_READ_CONF, &vdev->config, cfg_size, false) == 0, + "Could not get block device configuration"); +@@ -291,9 +300,10 @@ void virtio_setup_ccw(VDev *vdev) + run_ccw(vdev, CCW_CMD_SET_VQ, &info, sizeof(info), false) == 0, + "Cannot set VQ info"); + } +- IPL_assert( +- run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false) == 0, +- "Could not write status to host"); ++ ++ status |= VIRTIO_CONFIG_S_DRIVER_OK; ++ rc = run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false); ++ IPL_assert(rc == 0, "Could not write DRIVER_OK status to host"); + } + + bool virtio_is_supported(SubChannelId schid) +-- +2.31.1 + diff --git a/kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch b/kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch new file mode 100644 index 0000000..109b98e --- /dev/null +++ b/kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch @@ -0,0 +1,101 @@ +From d3335a98a7b6e084aadf4907968536a67cf8e64c Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 20:49:01 +0200 +Subject: [PATCH 09/17] pc-bios/s390-ccw/virtio-blkdev: Remove + virtio_assume_scsi() + +RH-Author: Thomas Huth +RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [4/10] bf27f75344f220a03475a2918ed49ec9cd5ba317 (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2098077 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098077 + +commit 5447de2619050a0a4dd480b97f88a9b58da360d1 +Author: Thomas Huth +Date: Mon Jul 4 13:18:56 2022 +0200 + + pc-bios/s390-ccw/virtio-blkdev: Remove virtio_assume_scsi() + + The virtio_assume_scsi() function is very questionable: First, it + is only called for virtio-blk, and not for virtio-scsi, so the naming + is already quite confusing. Second, it is called if we detected a + "invalid" IPL disk, trying to fix it by blindly setting a sector + size of 512. This of course won't work in most cases since disks + might have a different sector size for a reason. + + Thus let's remove this strange function now. The calling code can + also be removed completely, since there is another spot in main.c + that does "IPL_assert(virtio_ipl_disk_is_valid(), ...)" to make + sure that we do not try to IPL from an invalid device. + + Message-Id: <20220704111903.62400-6-thuth@redhat.com> + Reviewed-by: Eric Farman + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/virtio-blkdev.c | 24 ------------------------ + pc-bios/s390-ccw/virtio.h | 1 - + 2 files changed, 25 deletions(-) + +diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c +index 7e13155589..db1f7f44aa 100644 +--- a/pc-bios/s390-ccw/virtio-blkdev.c ++++ b/pc-bios/s390-ccw/virtio-blkdev.c +@@ -112,23 +112,6 @@ VirtioGDN virtio_guessed_disk_nature(void) + return virtio_get_device()->guessed_disk_nature; + } + +-void virtio_assume_scsi(void) +-{ +- VDev *vdev = virtio_get_device(); +- +- switch (vdev->senseid.cu_model) { +- case VIRTIO_ID_BLOCK: +- vdev->guessed_disk_nature = VIRTIO_GDN_SCSI; +- vdev->config.blk.blk_size = VIRTIO_SCSI_BLOCK_SIZE; +- vdev->config.blk.physical_block_exp = 0; +- vdev->blk_factor = 1; +- break; +- case VIRTIO_ID_SCSI: +- vdev->scsi_block_size = VIRTIO_SCSI_BLOCK_SIZE; +- break; +- } +-} +- + void virtio_assume_iso9660(void) + { + VDev *vdev = virtio_get_device(); +@@ -247,13 +230,6 @@ int virtio_blk_setup_device(SubChannelId schid) + switch (vdev->senseid.cu_model) { + case VIRTIO_ID_BLOCK: + sclp_print("Using virtio-blk.\n"); +- if (!virtio_ipl_disk_is_valid()) { +- /* make sure all getters but blocksize return 0 for +- * invalid IPL disk +- */ +- memset(&vdev->config.blk, 0, sizeof(vdev->config.blk)); +- virtio_assume_scsi(); +- } + break; + case VIRTIO_ID_SCSI: + IPL_assert(vdev->config.scsi.sense_size == VIRTIO_SCSI_SENSE_SIZE, +diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h +index 241730effe..600ba5052b 100644 +--- a/pc-bios/s390-ccw/virtio.h ++++ b/pc-bios/s390-ccw/virtio.h +@@ -182,7 +182,6 @@ enum guessed_disk_nature_type { + typedef enum guessed_disk_nature_type VirtioGDN; + + VirtioGDN virtio_guessed_disk_nature(void); +-void virtio_assume_scsi(void); + void virtio_assume_eckd(void); + void virtio_assume_iso9660(void); + +-- +2.31.1 + diff --git a/kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch b/kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch new file mode 100644 index 0000000..8bc7a11 --- /dev/null +++ b/kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch @@ -0,0 +1,63 @@ +From db58915fcaf3d24b64fe2c34cc15b5596b9a81bb Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 20:49:01 +0200 +Subject: [PATCH 14/17] pc-bios/s390-ccw/virtio-blkdev: Request the right + feature bits + +RH-Author: Thomas Huth +RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [9/10] 9dcd8c2f659f366f9487ab6473d1f0d7778b40a7 (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2098077 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098077 + +commit 9125a314cca4a1838b09305a87d8efb98f80ab67 +Author: Thomas Huth +Date: Mon Jul 4 13:19:01 2022 +0200 + + pc-bios/s390-ccw/virtio-blkdev: Request the right feature bits + + The virtio-blk code uses the block size and geometry fields in the + config area. According to the virtio-spec, these have to be negotiated + with the right feature bits during initialization, otherwise they + might not be available. QEMU is so far very forgiving and always + provides them, but we should not rely on this behavior, so let's + better request them properly via the VIRTIO_BLK_F_GEOMETRY and + VIRTIO_BLK_F_BLK_SIZE feature bits. + + Message-Id: <20220704111903.62400-11-thuth@redhat.com> + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/virtio-blkdev.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c +index c175b66a47..8271c47296 100644 +--- a/pc-bios/s390-ccw/virtio-blkdev.c ++++ b/pc-bios/s390-ccw/virtio-blkdev.c +@@ -13,6 +13,9 @@ + #include "virtio.h" + #include "virtio-scsi.h" + ++#define VIRTIO_BLK_F_GEOMETRY (1 << 4) ++#define VIRTIO_BLK_F_BLK_SIZE (1 << 6) ++ + static int virtio_blk_read_many(VDev *vdev, ulong sector, void *load_addr, + int sec_num) + { +@@ -223,6 +226,7 @@ int virtio_blk_setup_device(SubChannelId schid) + { + VDev *vdev = virtio_get_device(); + ++ vdev->guest_features[0] = VIRTIO_BLK_F_GEOMETRY | VIRTIO_BLK_F_BLK_SIZE; + vdev->schid = schid; + virtio_setup_ccw(vdev); + +-- +2.31.1 + diff --git a/kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch b/kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch new file mode 100644 index 0000000..818e515 --- /dev/null +++ b/kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch @@ -0,0 +1,124 @@ +From f07e4629a7c58407f903810a038660c88c6a6315 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 20:49:01 +0200 +Subject: [PATCH 08/17] pc-bios/s390-ccw/virtio-blkdev: Simplify/fix + virtio_ipl_disk_is_valid() + +RH-Author: Thomas Huth +RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [3/10] fb06830a3e50d9da3d84913b50bb227865cc44b3 (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2098077 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098077 + +commit bbf615f7b707f009ef8e757d170902ad33b90644 +Author: Thomas Huth +Date: Mon Jul 4 13:18:55 2022 +0200 + + pc-bios/s390-ccw/virtio-blkdev: Simplify/fix virtio_ipl_disk_is_valid() + + The s390-ccw bios fails to boot if the boot disk is a virtio-blk + disk with a sector size of 4096. For example: + + dasdfmt -b 4096 -d cdl -y -p -M quick /dev/dasdX + fdasd -a /dev/dasdX + install a guest onto /dev/dasdX1 using virtio-blk + qemu-system-s390x -nographic -hda /dev/dasdX1 + + The bios then bails out with: + + ! Cannot read block 0 ! + + Looking at virtio_ipl_disk_is_valid() and especially the function + virtio_disk_is_scsi(), it does not really make sense that we expect + only such a limited disk geometry (like a block size of 512) for + our boot disks. Let's relax the check and allow everything that + remotely looks like a sane disk. + + Message-Id: <20220704111903.62400-5-thuth@redhat.com> + Reviewed-by: Eric Farman + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/virtio-blkdev.c | 41 ++++++-------------------------- + pc-bios/s390-ccw/virtio.h | 2 -- + 2 files changed, 7 insertions(+), 36 deletions(-) + +diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c +index 6483307630..7e13155589 100644 +--- a/pc-bios/s390-ccw/virtio-blkdev.c ++++ b/pc-bios/s390-ccw/virtio-blkdev.c +@@ -166,46 +166,19 @@ void virtio_assume_eckd(void) + virtio_eckd_sectors_for_block_size(vdev->config.blk.blk_size); + } + +-bool virtio_disk_is_scsi(void) +-{ +- VDev *vdev = virtio_get_device(); +- +- if (vdev->guessed_disk_nature == VIRTIO_GDN_SCSI) { +- return true; +- } +- switch (vdev->senseid.cu_model) { +- case VIRTIO_ID_BLOCK: +- return (vdev->config.blk.geometry.heads == 255) +- && (vdev->config.blk.geometry.sectors == 63) +- && (virtio_get_block_size() == VIRTIO_SCSI_BLOCK_SIZE); +- case VIRTIO_ID_SCSI: +- return true; +- } +- return false; +-} +- +-bool virtio_disk_is_eckd(void) ++bool virtio_ipl_disk_is_valid(void) + { ++ int blksize = virtio_get_block_size(); + VDev *vdev = virtio_get_device(); +- const int block_size = virtio_get_block_size(); + +- if (vdev->guessed_disk_nature == VIRTIO_GDN_DASD) { ++ if (vdev->guessed_disk_nature == VIRTIO_GDN_SCSI || ++ vdev->guessed_disk_nature == VIRTIO_GDN_DASD) { + return true; + } +- switch (vdev->senseid.cu_model) { +- case VIRTIO_ID_BLOCK: +- return (vdev->config.blk.geometry.heads == 15) +- && (vdev->config.blk.geometry.sectors == +- virtio_eckd_sectors_for_block_size(block_size)); +- case VIRTIO_ID_SCSI: +- return false; +- } +- return false; +-} + +-bool virtio_ipl_disk_is_valid(void) +-{ +- return virtio_disk_is_scsi() || virtio_disk_is_eckd(); ++ return (vdev->senseid.cu_model == VIRTIO_ID_BLOCK || ++ vdev->senseid.cu_model == VIRTIO_ID_SCSI) && ++ blksize >= 512 && blksize <= 4096; + } + + int virtio_get_block_size(void) +diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h +index 9e410bde6f..241730effe 100644 +--- a/pc-bios/s390-ccw/virtio.h ++++ b/pc-bios/s390-ccw/virtio.h +@@ -186,8 +186,6 @@ void virtio_assume_scsi(void); + void virtio_assume_eckd(void); + void virtio_assume_iso9660(void); + +-extern bool virtio_disk_is_scsi(void); +-extern bool virtio_disk_is_eckd(void); + extern bool virtio_ipl_disk_is_valid(void); + extern int virtio_get_block_size(void); + extern uint8_t virtio_get_heads(void); +-- +2.31.1 + diff --git a/kvm-pci-expose-TYPE_XIO3130_DOWNSTREAM-name.patch b/kvm-pci-expose-TYPE_XIO3130_DOWNSTREAM-name.patch new file mode 100644 index 0000000..817f0ab --- /dev/null +++ b/kvm-pci-expose-TYPE_XIO3130_DOWNSTREAM-name.patch @@ -0,0 +1,83 @@ +From 7998e8aa78caa35c2ab2da44f9e29e21d7548c61 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 23 Mar 2022 13:21:40 -0400 +Subject: [PATCH 05/18] pci: expose TYPE_XIO3130_DOWNSTREAM name + +RH-Author: Jon Maloy +RH-MergeRequest: 134: pci: expose TYPE_XIO3130_DOWNSTREAM name +RH-Commit: [1/2] f09ddcaf686f22b545bf269f87787ebfc33fccda (jmaloy/qemu-kvm) +RH-Bugzilla: 2062610 +RH-Acked-by: Igor Mammedov +RH-Acked-by: Gerd Hoffmann + +BZ: https://bugzilla.redhat.com/2062610 +UPSTREAM: merged +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=44038138 + +commit c41481af9a5d0d463607cc45b45c510875570817 +Author: Igor Mammedov +Date: Tue Mar 1 10:11:58 2022 -0500 + + pci: expose TYPE_XIO3130_DOWNSTREAM name + + Type name will be used in followup patch for cast check + in pcihp code. + + Signed-off-by: Igor Mammedov + Message-Id: <20220301151200.3507298-2-imammedo@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit c41481af9a5d0d463607cc45b45c510875570817) +Signed-off-by: Jon Maloy +--- + hw/pci-bridge/xio3130_downstream.c | 3 ++- + include/hw/pci-bridge/xio3130_downstream.h | 15 +++++++++++++++ + 2 files changed, 17 insertions(+), 1 deletion(-) + create mode 100644 include/hw/pci-bridge/xio3130_downstream.h + +diff --git a/hw/pci-bridge/xio3130_downstream.c b/hw/pci-bridge/xio3130_downstream.c +index 04aae72cd6..b17cafd359 100644 +--- a/hw/pci-bridge/xio3130_downstream.c ++++ b/hw/pci-bridge/xio3130_downstream.c +@@ -28,6 +28,7 @@ + #include "migration/vmstate.h" + #include "qapi/error.h" + #include "qemu/module.h" ++#include "hw/pci-bridge/xio3130_downstream.h" + + #define PCI_DEVICE_ID_TI_XIO3130D 0x8233 /* downstream port */ + #define XIO3130_REVISION 0x1 +@@ -173,7 +174,7 @@ static void xio3130_downstream_class_init(ObjectClass *klass, void *data) + } + + static const TypeInfo xio3130_downstream_info = { +- .name = "xio3130-downstream", ++ .name = TYPE_XIO3130_DOWNSTREAM, + .parent = TYPE_PCIE_SLOT, + .class_init = xio3130_downstream_class_init, + .interfaces = (InterfaceInfo[]) { +diff --git a/include/hw/pci-bridge/xio3130_downstream.h b/include/hw/pci-bridge/xio3130_downstream.h +new file mode 100644 +index 0000000000..1d10139aea +--- /dev/null ++++ b/include/hw/pci-bridge/xio3130_downstream.h +@@ -0,0 +1,15 @@ ++/* ++ * TI X3130 pci express downstream port switch ++ * ++ * Copyright (C) 2022 Igor Mammedov ++ * ++ * SPDX-License-Identifier: GPL-2.0-or-later ++ */ ++ ++#ifndef HW_PCI_BRIDGE_XIO3130_DOWNSTREAM_H ++#define HW_PCI_BRIDGE_XIO3130_DOWNSTREAM_H ++ ++#define TYPE_XIO3130_DOWNSTREAM "xio3130-downstream" ++ ++#endif ++ +-- +2.27.0 + diff --git a/kvm-qapi-Cleanup-SGX-related-comments-and-restore-sectio.patch b/kvm-qapi-Cleanup-SGX-related-comments-and-restore-sectio.patch new file mode 100644 index 0000000..5ef458c --- /dev/null +++ b/kvm-qapi-Cleanup-SGX-related-comments-and-restore-sectio.patch @@ -0,0 +1,214 @@ +From d0cd7be4d347ebe118eb8f3f2fc2eb3e3eb77e3a Mon Sep 17 00:00:00 2001 +From: Yang Zhong +Date: Thu, 20 Jan 2022 17:31:04 -0500 +Subject: [PATCH 5/7] qapi: Cleanup SGX related comments and restore + @section-size +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paul Lai +RH-MergeRequest: 111: numa: Enable numa for SGX EPC sections +RH-Commit: [5/5] 497dbeaebb7b8f99f5f8a7de58000dcab0d0c22d +RH-Bugzilla: 1518984 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Bandan Das +RH-Acked-by: Cornelia Huck + +The SGX NUMA patches were merged into Qemu 7.0 release, we need +clarify detailed version history information and also change +some related comments, which make SGX related comments clearer. + +The QMP command schema promises backwards compatibility as standard. +We temporarily restore "@section-size", which can avoid incompatible +API breakage. The "@section-size" will be deprecated in 7.2 version. + +Suggested-by: Daniel P. Berrangé +Signed-off-by: Yang Zhong +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Philippe Mathieu-Daudé +Message-Id: <20220120223104.437161-1-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit a66bd91f030827742778a9e0da19fe55716b4a60) +Signed-off-by: Paul Lai +--- + docs/about/deprecated.rst | 13 +++++++++++++ + hw/i386/sgx.c | 11 +++++++++-- + qapi/machine.json | 4 ++-- + qapi/misc-target.json | 22 +++++++++++++++++----- + 4 files changed, 41 insertions(+), 9 deletions(-) + +diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst +index ff7488cb63..33925edf45 100644 +--- a/docs/about/deprecated.rst ++++ b/docs/about/deprecated.rst +@@ -270,6 +270,19 @@ accepted incorrect commands will return an error. Users should make sure that + all arguments passed to ``device_add`` are consistent with the documented + property types. + ++``query-sgx`` return value member ``section-size`` (since 7.0) ++'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' ++ ++Member ``section-size`` in return value elements with meta-type ``uint64`` is ++deprecated. Use ``sections`` instead. ++ ++ ++``query-sgx-capabilities`` return value member ``section-size`` (since 7.0) ++''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' ++ ++Member ``section-size`` in return value elements with meta-type ``uint64`` is ++deprecated. Use ``sections`` instead. ++ + System accelerators + ------------------- + +diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c +index 5de5dd0893..a2b318dd93 100644 +--- a/hw/i386/sgx.c ++++ b/hw/i386/sgx.c +@@ -83,7 +83,7 @@ static uint64_t sgx_calc_section_metric(uint64_t low, uint64_t high) + ((high & MAKE_64BIT_MASK(0, 20)) << 32); + } + +-static SGXEPCSectionList *sgx_calc_host_epc_sections(void) ++static SGXEPCSectionList *sgx_calc_host_epc_sections(uint64_t *size) + { + SGXEPCSectionList *head = NULL, **tail = &head; + SGXEPCSection *section; +@@ -106,6 +106,7 @@ static SGXEPCSectionList *sgx_calc_host_epc_sections(void) + section = g_new0(SGXEPCSection, 1); + section->node = j++; + section->size = sgx_calc_section_metric(ecx, edx); ++ *size += section->size; + QAPI_LIST_APPEND(tail, section); + } + +@@ -156,6 +157,7 @@ SGXInfo *qmp_query_sgx_capabilities(Error **errp) + { + SGXInfo *info = NULL; + uint32_t eax, ebx, ecx, edx; ++ uint64_t size = 0; + + int fd = qemu_open_old("/dev/sgx_vepc", O_RDWR); + if (fd < 0) { +@@ -173,7 +175,8 @@ SGXInfo *qmp_query_sgx_capabilities(Error **errp) + info->sgx1 = eax & (1U << 0) ? true : false; + info->sgx2 = eax & (1U << 1) ? true : false; + +- info->sections = sgx_calc_host_epc_sections(); ++ info->sections = sgx_calc_host_epc_sections(&size); ++ info->section_size = size; + + close(fd); + +@@ -220,12 +223,14 @@ SGXInfo *qmp_query_sgx(Error **errp) + return NULL; + } + ++ SGXEPCState *sgx_epc = &pcms->sgx_epc; + info = g_new0(SGXInfo, 1); + + info->sgx = true; + info->sgx1 = true; + info->sgx2 = true; + info->flc = true; ++ info->section_size = sgx_epc->size; + info->sections = sgx_get_epc_sections_list(); + + return info; +@@ -249,6 +254,8 @@ void hmp_info_sgx(Monitor *mon, const QDict *qdict) + info->sgx2 ? "enabled" : "disabled"); + monitor_printf(mon, "FLC support: %s\n", + info->flc ? "enabled" : "disabled"); ++ monitor_printf(mon, "size: %" PRIu64 "\n", ++ info->section_size); + + section_list = info->sections; + for (section = section_list; section; section = section->next) { +diff --git a/qapi/machine.json b/qapi/machine.json +index 16e771affc..a9f33d0f27 100644 +--- a/qapi/machine.json ++++ b/qapi/machine.json +@@ -1207,7 +1207,7 @@ + # + # @memdev: memory backend linked with device + # +-# @node: the numa node ++# @node: the numa node (Since: 7.0) + # + # Since: 6.2 + ## +@@ -1288,7 +1288,7 @@ + # + # @memdev: memory backend linked with device + # +-# @node: the numa node ++# @node: the numa node (Since: 7.0) + # + # Since: 6.2 + ## +diff --git a/qapi/misc-target.json b/qapi/misc-target.json +index 1022aa0184..4bc45d2474 100644 +--- a/qapi/misc-target.json ++++ b/qapi/misc-target.json +@@ -344,9 +344,9 @@ + # + # @node: the numa node + # +-# @size: the size of epc section ++# @size: the size of EPC section + # +-# Since: 6.2 ++# Since: 7.0 + ## + { 'struct': 'SGXEPCSection', + 'data': { 'node': 'int', +@@ -365,7 +365,13 @@ + # + # @flc: true if FLC is supported + # +-# @sections: The EPC sections info for guest ++# @section-size: The EPC section size for guest ++# Redundant with @sections. Just for backward compatibility. ++# ++# @sections: The EPC sections info for guest (Since: 7.0) ++# ++# Features: ++# @deprecated: Member @section-size is deprecated. Use @sections instead. + # + # Since: 6.2 + ## +@@ -374,6 +380,8 @@ + 'sgx1': 'bool', + 'sgx2': 'bool', + 'flc': 'bool', ++ 'section-size': { 'type': 'uint64', ++ 'features': [ 'deprecated' ] }, + 'sections': ['SGXEPCSection']}, + 'if': 'TARGET_I386' } + +@@ -390,7 +398,9 @@ + # + # -> { "execute": "query-sgx" } + # <- { "return": { "sgx": true, "sgx1" : true, "sgx2" : true, +-# "flc": true, "section-size" : 0 } } ++# "flc": true, "section-size" : 96468992, ++# "sections": [{"node": 0, "size": 67108864}, ++# {"node": 1, "size": 29360128}]} } + # + ## + { 'command': 'query-sgx', 'returns': 'SGXInfo', 'if': 'TARGET_I386' } +@@ -408,7 +418,9 @@ + # + # -> { "execute": "query-sgx-capabilities" } + # <- { "return": { "sgx": true, "sgx1" : true, "sgx2" : true, +-# "flc": true, "section-size" : 0 } } ++# "flc": true, "section-size" : 96468992, ++# "section" : [{"node": 0, "size": 67108864}, ++# {"node": 1, "size": 29360128}]} } + # + ## + { 'command': 'query-sgx-capabilities', 'returns': 'SGXInfo', 'if': 'TARGET_I386' } +-- +2.27.0 + diff --git a/kvm-qapi-machine.json-Add-cluster-id.patch b/kvm-qapi-machine.json-Add-cluster-id.patch new file mode 100644 index 0000000..2b2a22a --- /dev/null +++ b/kvm-qapi-machine.json-Add-cluster-id.patch @@ -0,0 +1,126 @@ +From e97c563f7146098119839aa146a6f25070eb7148 Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 11 May 2022 18:01:02 +0800 +Subject: [PATCH 01/16] qapi/machine.json: Add cluster-id + +RH-Author: Gavin Shan +RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology +RH-Commit: [1/6] 44d7d83008c6d28485ae44f7cced792f4987b919 (gwshan/qemu-rhel-9) +RH-Bugzilla: 2041823 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Andrew Jones + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823 + +This adds cluster-id in CPU instance properties, which will be used +by arm/virt machine. Besides, the cluster-id is also verified or +dumped in various spots: + + * hw/core/machine.c::machine_set_cpu_numa_node() to associate + CPU with its NUMA node. + + * hw/core/machine.c::machine_numa_finish_cpu_init() to record + CPU slots with no NUMA mapping set. + + * hw/core/machine-hmp-cmds.c::hmp_hotpluggable_cpus() to dump + cluster-id. + +Signed-off-by: Gavin Shan +Reviewed-by: Yanan Wang +Acked-by: Igor Mammedov +Message-id: 20220503140304.855514-2-gshan@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit 1dcf7001d4bae651129d46d5628b29e93a411d0b) +Signed-off-by: Gavin Shan +--- + hw/core/machine-hmp-cmds.c | 4 ++++ + hw/core/machine.c | 16 ++++++++++++++++ + qapi/machine.json | 6 ++++-- + 3 files changed, 24 insertions(+), 2 deletions(-) + +diff --git a/hw/core/machine-hmp-cmds.c b/hw/core/machine-hmp-cmds.c +index 4e2f319aeb..5cb5eecbfc 100644 +--- a/hw/core/machine-hmp-cmds.c ++++ b/hw/core/machine-hmp-cmds.c +@@ -77,6 +77,10 @@ void hmp_hotpluggable_cpus(Monitor *mon, const QDict *qdict) + if (c->has_die_id) { + monitor_printf(mon, " die-id: \"%" PRIu64 "\"\n", c->die_id); + } ++ if (c->has_cluster_id) { ++ monitor_printf(mon, " cluster-id: \"%" PRIu64 "\"\n", ++ c->cluster_id); ++ } + if (c->has_core_id) { + monitor_printf(mon, " core-id: \"%" PRIu64 "\"\n", c->core_id); + } +diff --git a/hw/core/machine.c b/hw/core/machine.c +index dffc3ef4ab..168f4de910 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -890,6 +890,11 @@ void machine_set_cpu_numa_node(MachineState *machine, + return; + } + ++ if (props->has_cluster_id && !slot->props.has_cluster_id) { ++ error_setg(errp, "cluster-id is not supported"); ++ return; ++ } ++ + if (props->has_socket_id && !slot->props.has_socket_id) { + error_setg(errp, "socket-id is not supported"); + return; +@@ -909,6 +914,11 @@ void machine_set_cpu_numa_node(MachineState *machine, + continue; + } + ++ if (props->has_cluster_id && ++ props->cluster_id != slot->props.cluster_id) { ++ continue; ++ } ++ + if (props->has_die_id && props->die_id != slot->props.die_id) { + continue; + } +@@ -1203,6 +1213,12 @@ static char *cpu_slot_to_string(const CPUArchId *cpu) + } + g_string_append_printf(s, "die-id: %"PRId64, cpu->props.die_id); + } ++ if (cpu->props.has_cluster_id) { ++ if (s->len) { ++ g_string_append_printf(s, ", "); ++ } ++ g_string_append_printf(s, "cluster-id: %"PRId64, cpu->props.cluster_id); ++ } + if (cpu->props.has_core_id) { + if (s->len) { + g_string_append_printf(s, ", "); +diff --git a/qapi/machine.json b/qapi/machine.json +index d25a481ce4..4c417e32a5 100644 +--- a/qapi/machine.json ++++ b/qapi/machine.json +@@ -868,10 +868,11 @@ + # @node-id: NUMA node ID the CPU belongs to + # @socket-id: socket number within node/board the CPU belongs to + # @die-id: die number within socket the CPU belongs to (since 4.1) +-# @core-id: core number within die the CPU belongs to ++# @cluster-id: cluster number within die the CPU belongs to (since 7.1) ++# @core-id: core number within cluster the CPU belongs to + # @thread-id: thread number within core the CPU belongs to + # +-# Note: currently there are 5 properties that could be present ++# Note: currently there are 6 properties that could be present + # but management should be prepared to pass through other + # properties with device_add command to allow for future + # interface extension. This also requires the filed names to be kept in +@@ -883,6 +884,7 @@ + 'data': { '*node-id': 'int', + '*socket-id': 'int', + '*die-id': 'int', ++ '*cluster-id': 'int', + '*core-id': 'int', + '*thread-id': 'int' + } +-- +2.31.1 + diff --git a/kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch b/kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch new file mode 100644 index 0000000..9010d3d --- /dev/null +++ b/kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch @@ -0,0 +1,162 @@ +From 5e385a0e49a520550a83299632be175857b63f19 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Tue, 5 Apr 2022 15:46:52 +0200 +Subject: [PATCH 06/16] qcow2: Add errp to rebuild_refcount_structure() + +RH-Author: Hanna Reitz +RH-MergeRequest: 96: qcow2: Improve refcount structure rebuilding +RH-Commit: [3/4] 937b89a7eab6ec6b18618d59bc1526976ad03290 (hreitz/qemu-kvm-c-9-s) +RH-Bugzilla: 2072379 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +Instead of fprint()-ing error messages in rebuild_refcount_structure() +and its rebuild_refcounts_write_refblocks() helper, pass them through an +Error object to qcow2_check_refcounts() (which will then print it). + +Suggested-by: Eric Blake +Signed-off-by: Hanna Reitz +Message-Id: <20220405134652.19278-4-hreitz@redhat.com> +Reviewed-by: Eric Blake +(cherry picked from commit 0423f75351ab83b844a31349218b0eadd830e07a) +Signed-off-by: Hanna Reitz +--- + block/qcow2-refcount.c | 33 +++++++++++++++++++-------------- + 1 file changed, 19 insertions(+), 14 deletions(-) + +diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c +index c5669eaa51..ed0ecfaa89 100644 +--- a/block/qcow2-refcount.c ++++ b/block/qcow2-refcount.c +@@ -2465,7 +2465,8 @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs, + static int rebuild_refcounts_write_refblocks( + BlockDriverState *bs, void **refcount_table, int64_t *nb_clusters, + int64_t first_cluster, int64_t end_cluster, +- uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr ++ uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr, ++ Error **errp + ) + { + BDRVQcow2State *s = bs->opaque; +@@ -2516,8 +2517,8 @@ static int rebuild_refcounts_write_refblocks( + nb_clusters, + &first_free_cluster); + if (refblock_offset < 0) { +- fprintf(stderr, "ERROR allocating refblock: %s\n", +- strerror(-refblock_offset)); ++ error_setg_errno(errp, -refblock_offset, ++ "ERROR allocating refblock"); + return refblock_offset; + } + +@@ -2539,6 +2540,7 @@ static int rebuild_refcounts_write_refblocks( + on_disk_reftable_entries * + REFTABLE_ENTRY_SIZE); + if (!on_disk_reftable) { ++ error_setg(errp, "ERROR allocating reftable memory"); + return -ENOMEM; + } + +@@ -2562,7 +2564,7 @@ static int rebuild_refcounts_write_refblocks( + ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset, + s->cluster_size, false); + if (ret < 0) { +- fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); ++ error_setg_errno(errp, -ret, "ERROR writing refblock"); + return ret; + } + +@@ -2578,7 +2580,7 @@ static int rebuild_refcounts_write_refblocks( + ret = bdrv_pwrite(bs->file, refblock_offset, on_disk_refblock, + s->cluster_size); + if (ret < 0) { +- fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); ++ error_setg_errno(errp, -ret, "ERROR writing refblock"); + return ret; + } + +@@ -2601,7 +2603,8 @@ static int rebuild_refcounts_write_refblocks( + static int rebuild_refcount_structure(BlockDriverState *bs, + BdrvCheckResult *res, + void **refcount_table, +- int64_t *nb_clusters) ++ int64_t *nb_clusters, ++ Error **errp) + { + BDRVQcow2State *s = bs->opaque; + int64_t reftable_offset = -1; +@@ -2652,7 +2655,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, + rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters, + 0, *nb_clusters, + &on_disk_reftable, +- &on_disk_reftable_entries); ++ &on_disk_reftable_entries, errp); + if (reftable_size_changed < 0) { + res->check_errors++; + ret = reftable_size_changed; +@@ -2676,8 +2679,8 @@ static int rebuild_refcount_structure(BlockDriverState *bs, + refcount_table, nb_clusters, + &first_free_cluster); + if (reftable_offset < 0) { +- fprintf(stderr, "ERROR allocating reftable: %s\n", +- strerror(-reftable_offset)); ++ error_setg_errno(errp, -reftable_offset, ++ "ERROR allocating reftable"); + res->check_errors++; + ret = reftable_offset; + goto fail; +@@ -2695,7 +2698,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, + reftable_start_cluster, + reftable_end_cluster, + &on_disk_reftable, +- &on_disk_reftable_entries); ++ &on_disk_reftable_entries, errp); + if (reftable_size_changed < 0) { + res->check_errors++; + ret = reftable_size_changed; +@@ -2725,7 +2728,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, + ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, reftable_length, + false); + if (ret < 0) { +- fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); ++ error_setg_errno(errp, -ret, "ERROR writing reftable"); + goto fail; + } + +@@ -2733,7 +2736,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, + ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable, + reftable_length); + if (ret < 0) { +- fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); ++ error_setg_errno(errp, -ret, "ERROR writing reftable"); + goto fail; + } + +@@ -2746,7 +2749,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, + &reftable_offset_and_clusters, + sizeof(reftable_offset_and_clusters)); + if (ret < 0) { +- fprintf(stderr, "ERROR setting reftable: %s\n", strerror(-ret)); ++ error_setg_errno(errp, -ret, "ERROR setting reftable"); + goto fail; + } + +@@ -2814,11 +2817,13 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, + if (rebuild && (fix & BDRV_FIX_ERRORS)) { + BdrvCheckResult old_res = *res; + int fresh_leaks = 0; ++ Error *local_err = NULL; + + fprintf(stderr, "Rebuilding refcount structure\n"); + ret = rebuild_refcount_structure(bs, res, &refcount_table, +- &nb_clusters); ++ &nb_clusters, &local_err); + if (ret < 0) { ++ error_report_err(local_err); + goto fail; + } + +-- +2.31.1 + diff --git a/kvm-qcow2-Improve-refcount-structure-rebuilding.patch b/kvm-qcow2-Improve-refcount-structure-rebuilding.patch new file mode 100644 index 0000000..cdc92b8 --- /dev/null +++ b/kvm-qcow2-Improve-refcount-structure-rebuilding.patch @@ -0,0 +1,465 @@ +From b453cf6be8429f4438d51eb24fcf49e7d9f14db6 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Tue, 5 Apr 2022 15:46:50 +0200 +Subject: [PATCH 04/16] qcow2: Improve refcount structure rebuilding + +RH-Author: Hanna Reitz +RH-MergeRequest: 96: qcow2: Improve refcount structure rebuilding +RH-Commit: [1/4] a3606b7abcaebb4930b566e95b1090aead62dfae (hreitz/qemu-kvm-c-9-s) +RH-Bugzilla: 2072379 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +When rebuilding the refcount structures (when qemu-img check -r found +errors with refcount = 0, but reference count > 0), the new refcount +table defaults to being put at the image file end[1]. There is no good +reason for that except that it means we will not have to rewrite any +refblocks we already wrote to disk. + +Changing the code to rewrite those refblocks is not too difficult, +though, so let us do that. That is beneficial for images on block +devices, where we cannot really write beyond the end of the image file. + +Use this opportunity to add extensive comments to the code, and refactor +it a bit, getting rid of the backwards-jumping goto. + +[1] Unless there is something allocated in the area pointed to by the + last refblock, so we have to write that refblock. In that case, we + try to put the reftable in there. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1519071 +Closes: https://gitlab.com/qemu-project/qemu/-/issues/941 +Reviewed-by: Eric Blake +Signed-off-by: Hanna Reitz +Message-Id: <20220405134652.19278-2-hreitz@redhat.com> +(cherry picked from commit a8c07ec287554dcefd33733f0e5888a281ddc95e) +Signed-off-by: Hanna Reitz +--- + block/qcow2-refcount.c | 332 +++++++++++++++++++++++++++++------------ + 1 file changed, 235 insertions(+), 97 deletions(-) + +diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c +index b91499410c..c5669eaa51 100644 +--- a/block/qcow2-refcount.c ++++ b/block/qcow2-refcount.c +@@ -2438,111 +2438,140 @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs, + } + + /* +- * Creates a new refcount structure based solely on the in-memory information +- * given through *refcount_table. All necessary allocations will be reflected +- * in that array. ++ * Helper function for rebuild_refcount_structure(). + * +- * On success, the old refcount structure is leaked (it will be covered by the +- * new refcount structure). ++ * Scan the range of clusters [first_cluster, end_cluster) for allocated ++ * clusters and write all corresponding refblocks to disk. The refblock ++ * and allocation data is taken from the in-memory refcount table ++ * *refcount_table[] (of size *nb_clusters), which is basically one big ++ * (unlimited size) refblock for the whole image. ++ * ++ * For these refblocks, clusters are allocated using said in-memory ++ * refcount table. Care is taken that these allocations are reflected ++ * in the refblocks written to disk. ++ * ++ * The refblocks' offsets are written into a reftable, which is ++ * *on_disk_reftable_ptr[] (of size *on_disk_reftable_entries_ptr). If ++ * that reftable is of insufficient size, it will be resized to fit. ++ * This reftable is not written to disk. ++ * ++ * (If *on_disk_reftable_ptr is not NULL, the entries within are assumed ++ * to point to existing valid refblocks that do not need to be allocated ++ * again.) ++ * ++ * Return whether the on-disk reftable array was resized (true/false), ++ * or -errno on error. + */ +-static int rebuild_refcount_structure(BlockDriverState *bs, +- BdrvCheckResult *res, +- void **refcount_table, +- int64_t *nb_clusters) ++static int rebuild_refcounts_write_refblocks( ++ BlockDriverState *bs, void **refcount_table, int64_t *nb_clusters, ++ int64_t first_cluster, int64_t end_cluster, ++ uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr ++ ) + { + BDRVQcow2State *s = bs->opaque; +- int64_t first_free_cluster = 0, reftable_offset = -1, cluster = 0; ++ int64_t cluster; + int64_t refblock_offset, refblock_start, refblock_index; +- uint32_t reftable_size = 0; +- uint64_t *on_disk_reftable = NULL; ++ int64_t first_free_cluster = 0; ++ uint64_t *on_disk_reftable = *on_disk_reftable_ptr; ++ uint32_t on_disk_reftable_entries = *on_disk_reftable_entries_ptr; + void *on_disk_refblock; +- int ret = 0; +- struct { +- uint64_t reftable_offset; +- uint32_t reftable_clusters; +- } QEMU_PACKED reftable_offset_and_clusters; +- +- qcow2_cache_empty(bs, s->refcount_block_cache); ++ bool reftable_grown = false; ++ int ret; + +-write_refblocks: +- for (; cluster < *nb_clusters; cluster++) { ++ for (cluster = first_cluster; cluster < end_cluster; cluster++) { ++ /* Check all clusters to find refblocks that contain non-zero entries */ + if (!s->get_refcount(*refcount_table, cluster)) { + continue; + } + ++ /* ++ * This cluster is allocated, so we need to create a refblock ++ * for it. The data we will write to disk is just the ++ * respective slice from *refcount_table, so it will contain ++ * accurate refcounts for all clusters belonging to this ++ * refblock. After we have written it, we will therefore skip ++ * all remaining clusters in this refblock. ++ */ ++ + refblock_index = cluster >> s->refcount_block_bits; + refblock_start = refblock_index << s->refcount_block_bits; + +- /* Don't allocate a cluster in a refblock already written to disk */ +- if (first_free_cluster < refblock_start) { +- first_free_cluster = refblock_start; +- } +- refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table, +- nb_clusters, &first_free_cluster); +- if (refblock_offset < 0) { +- fprintf(stderr, "ERROR allocating refblock: %s\n", +- strerror(-refblock_offset)); +- res->check_errors++; +- ret = refblock_offset; +- goto fail; +- } ++ if (on_disk_reftable_entries > refblock_index && ++ on_disk_reftable[refblock_index]) ++ { ++ /* ++ * We can get here after a `goto write_refblocks`: We have a ++ * reftable from a previous run, and the refblock is already ++ * allocated. No need to allocate it again. ++ */ ++ refblock_offset = on_disk_reftable[refblock_index]; ++ } else { ++ int64_t refblock_cluster_index; + +- if (reftable_size <= refblock_index) { +- uint32_t old_reftable_size = reftable_size; +- uint64_t *new_on_disk_reftable; ++ /* Don't allocate a cluster in a refblock already written to disk */ ++ if (first_free_cluster < refblock_start) { ++ first_free_cluster = refblock_start; ++ } ++ refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table, ++ nb_clusters, ++ &first_free_cluster); ++ if (refblock_offset < 0) { ++ fprintf(stderr, "ERROR allocating refblock: %s\n", ++ strerror(-refblock_offset)); ++ return refblock_offset; ++ } + +- reftable_size = ROUND_UP((refblock_index + 1) * REFTABLE_ENTRY_SIZE, +- s->cluster_size) / REFTABLE_ENTRY_SIZE; +- new_on_disk_reftable = g_try_realloc(on_disk_reftable, +- reftable_size * +- REFTABLE_ENTRY_SIZE); +- if (!new_on_disk_reftable) { +- res->check_errors++; +- ret = -ENOMEM; +- goto fail; ++ refblock_cluster_index = refblock_offset / s->cluster_size; ++ if (refblock_cluster_index >= end_cluster) { ++ /* ++ * We must write the refblock that holds this refblock's ++ * refcount ++ */ ++ end_cluster = refblock_cluster_index + 1; + } +- on_disk_reftable = new_on_disk_reftable; + +- memset(on_disk_reftable + old_reftable_size, 0, +- (reftable_size - old_reftable_size) * REFTABLE_ENTRY_SIZE); ++ if (on_disk_reftable_entries <= refblock_index) { ++ on_disk_reftable_entries = ++ ROUND_UP((refblock_index + 1) * REFTABLE_ENTRY_SIZE, ++ s->cluster_size) / REFTABLE_ENTRY_SIZE; ++ on_disk_reftable = ++ g_try_realloc(on_disk_reftable, ++ on_disk_reftable_entries * ++ REFTABLE_ENTRY_SIZE); ++ if (!on_disk_reftable) { ++ return -ENOMEM; ++ } + +- /* The offset we have for the reftable is now no longer valid; +- * this will leak that range, but we can easily fix that by running +- * a leak-fixing check after this rebuild operation */ +- reftable_offset = -1; +- } else { +- assert(on_disk_reftable); +- } +- on_disk_reftable[refblock_index] = refblock_offset; ++ memset(on_disk_reftable + *on_disk_reftable_entries_ptr, 0, ++ (on_disk_reftable_entries - ++ *on_disk_reftable_entries_ptr) * ++ REFTABLE_ENTRY_SIZE); + +- /* If this is apparently the last refblock (for now), try to squeeze the +- * reftable in */ +- if (refblock_index == (*nb_clusters - 1) >> s->refcount_block_bits && +- reftable_offset < 0) +- { +- uint64_t reftable_clusters = size_to_clusters(s, reftable_size * +- REFTABLE_ENTRY_SIZE); +- reftable_offset = alloc_clusters_imrt(bs, reftable_clusters, +- refcount_table, nb_clusters, +- &first_free_cluster); +- if (reftable_offset < 0) { +- fprintf(stderr, "ERROR allocating reftable: %s\n", +- strerror(-reftable_offset)); +- res->check_errors++; +- ret = reftable_offset; +- goto fail; ++ *on_disk_reftable_ptr = on_disk_reftable; ++ *on_disk_reftable_entries_ptr = on_disk_reftable_entries; ++ ++ reftable_grown = true; ++ } else { ++ assert(on_disk_reftable); + } ++ on_disk_reftable[refblock_index] = refblock_offset; + } + ++ /* Refblock is allocated, write it to disk */ ++ + ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset, + s->cluster_size, false); + if (ret < 0) { + fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); +- goto fail; ++ return ret; + } + +- /* The size of *refcount_table is always cluster-aligned, therefore the +- * write operation will not overflow */ ++ /* ++ * The refblock is simply a slice of *refcount_table. ++ * Note that the size of *refcount_table is always aligned to ++ * whole clusters, so the write operation will not result in ++ * out-of-bounds accesses. ++ */ + on_disk_refblock = (void *)((char *) *refcount_table + + refblock_index * s->cluster_size); + +@@ -2550,23 +2579,99 @@ write_refblocks: + s->cluster_size); + if (ret < 0) { + fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); +- goto fail; ++ return ret; + } + +- /* Go to the end of this refblock */ ++ /* This refblock is done, skip to its end */ + cluster = refblock_start + s->refcount_block_size - 1; + } + +- if (reftable_offset < 0) { +- uint64_t post_refblock_start, reftable_clusters; ++ return reftable_grown; ++} ++ ++/* ++ * Creates a new refcount structure based solely on the in-memory information ++ * given through *refcount_table (this in-memory information is basically just ++ * the concatenation of all refblocks). All necessary allocations will be ++ * reflected in that array. ++ * ++ * On success, the old refcount structure is leaked (it will be covered by the ++ * new refcount structure). ++ */ ++static int rebuild_refcount_structure(BlockDriverState *bs, ++ BdrvCheckResult *res, ++ void **refcount_table, ++ int64_t *nb_clusters) ++{ ++ BDRVQcow2State *s = bs->opaque; ++ int64_t reftable_offset = -1; ++ int64_t reftable_length = 0; ++ int64_t reftable_clusters; ++ int64_t refblock_index; ++ uint32_t on_disk_reftable_entries = 0; ++ uint64_t *on_disk_reftable = NULL; ++ int ret = 0; ++ int reftable_size_changed = 0; ++ struct { ++ uint64_t reftable_offset; ++ uint32_t reftable_clusters; ++ } QEMU_PACKED reftable_offset_and_clusters; ++ ++ qcow2_cache_empty(bs, s->refcount_block_cache); ++ ++ /* ++ * For each refblock containing entries, we try to allocate a ++ * cluster (in the in-memory refcount table) and write its offset ++ * into on_disk_reftable[]. We then write the whole refblock to ++ * disk (as a slice of the in-memory refcount table). ++ * This is done by rebuild_refcounts_write_refblocks(). ++ * ++ * Once we have scanned all clusters, we try to find space for the ++ * reftable. This will dirty the in-memory refcount table (i.e. ++ * make it differ from the refblocks we have already written), so we ++ * need to run rebuild_refcounts_write_refblocks() again for the ++ * range of clusters where the reftable has been allocated. ++ * ++ * This second run might make the reftable grow again, in which case ++ * we will need to allocate another space for it, which is why we ++ * repeat all this until the reftable stops growing. ++ * ++ * (This loop will terminate, because with every cluster the ++ * reftable grows, it can accomodate a multitude of more refcounts, ++ * so that at some point this must be able to cover the reftable ++ * and all refblocks describing it.) ++ * ++ * We then convert the reftable to big-endian and write it to disk. ++ * ++ * Note that we never free any reftable allocations. Doing so would ++ * needlessly complicate the algorithm: The eventual second check ++ * run we do will clean up all leaks we have caused. ++ */ ++ ++ reftable_size_changed = ++ rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters, ++ 0, *nb_clusters, ++ &on_disk_reftable, ++ &on_disk_reftable_entries); ++ if (reftable_size_changed < 0) { ++ res->check_errors++; ++ ret = reftable_size_changed; ++ goto fail; ++ } ++ ++ /* ++ * There was no reftable before, so rebuild_refcounts_write_refblocks() ++ * must have increased its size (from 0 to something). ++ */ ++ assert(reftable_size_changed); ++ ++ do { ++ int64_t reftable_start_cluster, reftable_end_cluster; ++ int64_t first_free_cluster = 0; ++ ++ reftable_length = on_disk_reftable_entries * REFTABLE_ENTRY_SIZE; ++ reftable_clusters = size_to_clusters(s, reftable_length); + +- post_refblock_start = ROUND_UP(*nb_clusters, s->refcount_block_size); +- reftable_clusters = +- size_to_clusters(s, reftable_size * REFTABLE_ENTRY_SIZE); +- /* Not pretty but simple */ +- if (first_free_cluster < post_refblock_start) { +- first_free_cluster = post_refblock_start; +- } + reftable_offset = alloc_clusters_imrt(bs, reftable_clusters, + refcount_table, nb_clusters, + &first_free_cluster); +@@ -2578,24 +2683,55 @@ write_refblocks: + goto fail; + } + +- goto write_refblocks; +- } ++ /* ++ * We need to update the affected refblocks, so re-run the ++ * write_refblocks loop for the reftable's range of clusters. ++ */ ++ assert(offset_into_cluster(s, reftable_offset) == 0); ++ reftable_start_cluster = reftable_offset / s->cluster_size; ++ reftable_end_cluster = reftable_start_cluster + reftable_clusters; ++ reftable_size_changed = ++ rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters, ++ reftable_start_cluster, ++ reftable_end_cluster, ++ &on_disk_reftable, ++ &on_disk_reftable_entries); ++ if (reftable_size_changed < 0) { ++ res->check_errors++; ++ ret = reftable_size_changed; ++ goto fail; ++ } ++ ++ /* ++ * If the reftable size has changed, we will need to find a new ++ * allocation, repeating the loop. ++ */ ++ } while (reftable_size_changed); + +- for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) { ++ /* The above loop must have run at least once */ ++ assert(reftable_offset >= 0); ++ ++ /* ++ * All allocations are done, all refblocks are written, convert the ++ * reftable to big-endian and write it to disk. ++ */ ++ ++ for (refblock_index = 0; refblock_index < on_disk_reftable_entries; ++ refblock_index++) ++ { + cpu_to_be64s(&on_disk_reftable[refblock_index]); + } + +- ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, +- reftable_size * REFTABLE_ENTRY_SIZE, ++ ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, reftable_length, + false); + if (ret < 0) { + fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); + goto fail; + } + +- assert(reftable_size < INT_MAX / REFTABLE_ENTRY_SIZE); ++ assert(reftable_length < INT_MAX); + ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable, +- reftable_size * REFTABLE_ENTRY_SIZE); ++ reftable_length); + if (ret < 0) { + fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); + goto fail; +@@ -2604,7 +2740,7 @@ write_refblocks: + /* Enter new reftable into the image header */ + reftable_offset_and_clusters.reftable_offset = cpu_to_be64(reftable_offset); + reftable_offset_and_clusters.reftable_clusters = +- cpu_to_be32(size_to_clusters(s, reftable_size * REFTABLE_ENTRY_SIZE)); ++ cpu_to_be32(reftable_clusters); + ret = bdrv_pwrite_sync(bs->file, + offsetof(QCowHeader, refcount_table_offset), + &reftable_offset_and_clusters, +@@ -2614,12 +2750,14 @@ write_refblocks: + goto fail; + } + +- for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) { ++ for (refblock_index = 0; refblock_index < on_disk_reftable_entries; ++ refblock_index++) ++ { + be64_to_cpus(&on_disk_reftable[refblock_index]); + } + s->refcount_table = on_disk_reftable; + s->refcount_table_offset = reftable_offset; +- s->refcount_table_size = reftable_size; ++ s->refcount_table_size = on_disk_reftable_entries; + update_max_refcount_table_index(s); + + return 0; +-- +2.31.1 + diff --git a/kvm-qemu-nbd-Pass-max-connections-to-blockdev-layer.patch b/kvm-qemu-nbd-Pass-max-connections-to-blockdev-layer.patch new file mode 100644 index 0000000..9acff58 --- /dev/null +++ b/kvm-qemu-nbd-Pass-max-connections-to-blockdev-layer.patch @@ -0,0 +1,92 @@ +From e6aae1d0368a152924c38775e517f4e83c1d898b Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Wed, 11 May 2022 19:49:23 -0500 +Subject: [PATCH 1/2] qemu-nbd: Pass max connections to blockdev layer +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Blake +RH-MergeRequest: 90: Advertise MULTI_CONN on writeable NBD servers +RH-Commit: [1/2] b0e33fd125bf3523b8b9a4dead3c8bb2342bfd4e (ebblake/centos-qemu-kvm) +RH-Bugzilla: 1708300 +RH-Acked-by: Nir Soffer +RH-Acked-by: Kevin Wolf +RH-Acked-by: Daniel P. Berrangé + +The next patch wants to adjust whether the NBD server code advertises +MULTI_CONN based on whether it is known if the server limits to +exactly one client. For a server started by QMP, this information is +obtained through nbd_server_start (which can support more than one +export); but for qemu-nbd (which supports exactly one export), it is +controlled only by the command-line option -e/--shared. Since we +already have a hook function used by qemu-nbd, it's easiest to just +alter its signature to fit our needs. + +Signed-off-by: Eric Blake +Message-Id: <20220512004924.417153-2-eblake@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit a5fced40212ed73c715ca298a2929dd4d99c9999) +Signed-off-by: Eric Blake +--- + blockdev-nbd.c | 8 ++++---- + include/block/nbd.h | 2 +- + qemu-nbd.c | 2 +- + 3 files changed, 6 insertions(+), 6 deletions(-) + +diff --git a/blockdev-nbd.c b/blockdev-nbd.c +index 9840d25a82..add41a23af 100644 +--- a/blockdev-nbd.c ++++ b/blockdev-nbd.c +@@ -30,18 +30,18 @@ typedef struct NBDServerData { + } NBDServerData; + + static NBDServerData *nbd_server; +-static bool is_qemu_nbd; ++static int qemu_nbd_connections = -1; /* Non-negative if this is qemu-nbd */ + + static void nbd_update_server_watch(NBDServerData *s); + +-void nbd_server_is_qemu_nbd(bool value) ++void nbd_server_is_qemu_nbd(int max_connections) + { +- is_qemu_nbd = value; ++ qemu_nbd_connections = max_connections; + } + + bool nbd_server_is_running(void) + { +- return nbd_server || is_qemu_nbd; ++ return nbd_server || qemu_nbd_connections >= 0; + } + + static void nbd_blockdev_client_closed(NBDClient *client, bool ignored) +diff --git a/include/block/nbd.h b/include/block/nbd.h +index a98eb665da..c5a29ce1c6 100644 +--- a/include/block/nbd.h ++++ b/include/block/nbd.h +@@ -344,7 +344,7 @@ void nbd_client_new(QIOChannelSocket *sioc, + void nbd_client_get(NBDClient *client); + void nbd_client_put(NBDClient *client); + +-void nbd_server_is_qemu_nbd(bool value); ++void nbd_server_is_qemu_nbd(int max_connections); + bool nbd_server_is_running(void); + void nbd_server_start(SocketAddress *addr, const char *tls_creds, + const char *tls_authz, uint32_t max_connections, +diff --git a/qemu-nbd.c b/qemu-nbd.c +index 713e7557a9..8c25ae93df 100644 +--- a/qemu-nbd.c ++++ b/qemu-nbd.c +@@ -1087,7 +1087,7 @@ int main(int argc, char **argv) + + bs->detect_zeroes = detect_zeroes; + +- nbd_server_is_qemu_nbd(true); ++ nbd_server_is_qemu_nbd(shared); + + export_opts = g_new(BlockExportOptions, 1); + *export_opts = (BlockExportOptions) { +-- +2.31.1 + diff --git a/kvm-qtest-numa-test-Correct-CPU-and-NUMA-association-in-.patch b/kvm-qtest-numa-test-Correct-CPU-and-NUMA-association-in-.patch new file mode 100644 index 0000000..9c2ac99 --- /dev/null +++ b/kvm-qtest-numa-test-Correct-CPU-and-NUMA-association-in-.patch @@ -0,0 +1,100 @@ +From a039ed652e6d2f5edcef9d5d1d3baec17ce7f929 Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 11 May 2022 18:01:35 +0800 +Subject: [PATCH 04/16] qtest/numa-test: Correct CPU and NUMA association in + aarch64_numa_cpu() + +RH-Author: Gavin Shan +RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology +RH-Commit: [4/6] 64e9908a179eb4fb586d662f70f275a81808e50c (gwshan/qemu-rhel-9) +RH-Bugzilla: 2041823 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Andrew Jones + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823 + +In aarch64_numa_cpu(), the CPU and NUMA association is something +like below. Two threads in the same core/cluster/socket are +associated with two individual NUMA nodes, which is unreal as +Igor Mammedov mentioned. We don't expect the association to break +NUMA-to-socket boundary, which matches with the real world. + +NUMA-node socket cluster core thread +------------------------------------------ +0 0 0 0 0 +1 0 0 0 1 + +This corrects the topology for CPUs and their association with +NUMA nodes. After this patch is applied, the CPU and NUMA +association becomes something like below, which looks real. +Besides, socket/cluster/core/thread IDs are all checked when +the NUMA node IDs are verified. It helps to check if the CPU +topology is properly populated or not. + +NUMA-node socket cluster core thread +------------------------------------------ +0 1 0 0 0 +1 0 0 0 0 + +Suggested-by: Igor Mammedov +Signed-off-by: Gavin Shan +Acked-by: Igor Mammedov +Message-id: 20220503140304.855514-5-gshan@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit e280ecb39bc1629f74ea5479d464fd1608dc8f76) +Signed-off-by: Gavin Shan +--- + tests/qtest/numa-test.c | 18 ++++++++++++------ + 1 file changed, 12 insertions(+), 6 deletions(-) + +diff --git a/tests/qtest/numa-test.c b/tests/qtest/numa-test.c +index aeda8c774c..32e35daaae 100644 +--- a/tests/qtest/numa-test.c ++++ b/tests/qtest/numa-test.c +@@ -224,17 +224,17 @@ static void aarch64_numa_cpu(const void *data) + g_autofree char *cli = NULL; + + cli = make_cli(data, "-machine " +- "smp.cpus=2,smp.sockets=1,smp.clusters=1,smp.cores=1,smp.threads=2 " ++ "smp.cpus=2,smp.sockets=2,smp.clusters=1,smp.cores=1,smp.threads=1 " + "-numa node,nodeid=0,memdev=ram -numa node,nodeid=1 " +- "-numa cpu,node-id=1,thread-id=0 " +- "-numa cpu,node-id=0,thread-id=1"); ++ "-numa cpu,node-id=0,socket-id=1,cluster-id=0,core-id=0,thread-id=0 " ++ "-numa cpu,node-id=1,socket-id=0,cluster-id=0,core-id=0,thread-id=0"); + qts = qtest_init(cli); + cpus = get_cpus(qts, &resp); + g_assert(cpus); + + while ((e = qlist_pop(cpus))) { + QDict *cpu, *props; +- int64_t thread, node; ++ int64_t socket, cluster, core, thread, node; + + cpu = qobject_to(QDict, e); + g_assert(qdict_haskey(cpu, "props")); +@@ -242,12 +242,18 @@ static void aarch64_numa_cpu(const void *data) + + g_assert(qdict_haskey(props, "node-id")); + node = qdict_get_int(props, "node-id"); ++ g_assert(qdict_haskey(props, "socket-id")); ++ socket = qdict_get_int(props, "socket-id"); ++ g_assert(qdict_haskey(props, "cluster-id")); ++ cluster = qdict_get_int(props, "cluster-id"); ++ g_assert(qdict_haskey(props, "core-id")); ++ core = qdict_get_int(props, "core-id"); + g_assert(qdict_haskey(props, "thread-id")); + thread = qdict_get_int(props, "thread-id"); + +- if (thread == 0) { ++ if (socket == 0 && cluster == 0 && core == 0 && thread == 0) { + g_assert_cmpint(node, ==, 1); +- } else if (thread == 1) { ++ } else if (socket == 1 && cluster == 0 && core == 0 && thread == 0) { + g_assert_cmpint(node, ==, 0); + } else { + g_assert(false); +-- +2.31.1 + diff --git a/kvm-qtest-numa-test-Specify-CPU-topology-in-aarch64_numa.patch b/kvm-qtest-numa-test-Specify-CPU-topology-in-aarch64_numa.patch new file mode 100644 index 0000000..a87abc0 --- /dev/null +++ b/kvm-qtest-numa-test-Specify-CPU-topology-in-aarch64_numa.patch @@ -0,0 +1,68 @@ +From 66f3928b40991d8467a3da086688f73d061886c8 Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 11 May 2022 18:01:35 +0800 +Subject: [PATCH 02/16] qtest/numa-test: Specify CPU topology in + aarch64_numa_cpu() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Gavin Shan +RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology +RH-Commit: [2/6] b851e7ad59e057825392ddf75e9040cc102a0385 (gwshan/qemu-rhel-9) +RH-Bugzilla: 2041823 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Andrew Jones + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823 + +The CPU topology isn't enabled on arm/virt machine yet, but we're +going to do it in next patch. After the CPU topology is enabled by +next patch, "thread-id=1" becomes invalid because the CPU core is +preferred on arm/virt machine. It means these two CPUs have 0/1 +as their core IDs, but their thread IDs are all 0. It will trigger +test failure as the following message indicates: + +[14/21 qemu:qtest+qtest-aarch64 / qtest-aarch64/numa-test ERROR +1.48s killed by signal 6 SIGABRT +>>> G_TEST_DBUS_DAEMON=/home/gavin/sandbox/qemu.main/tests/dbus-vmstate-daemon.sh \ +QTEST_QEMU_STORAGE_DAEMON_BINARY=./storage-daemon/qemu-storage-daemon \ +QTEST_QEMU_BINARY=./qemu-system-aarch64 \ +QTEST_QEMU_IMG=./qemu-img MALLOC_PERTURB_=83 \ +/home/gavin/sandbox/qemu.main/build/tests/qtest/numa-test --tap -k +―――――――――――――――――――――――――――――――――――――――――――――― +stderr: +qemu-system-aarch64: -numa cpu,node-id=0,thread-id=1: no match found + +This fixes the issue by providing comprehensive SMP configurations +in aarch64_numa_cpu(). The SMP configurations aren't used before +the CPU topology is enabled in next patch. + +Signed-off-by: Gavin Shan +Reviewed-by: Yanan Wang +Message-id: 20220503140304.855514-3-gshan@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit ac7199a2523ce2ccf8e685087a5d177eeca89b09) +Signed-off-by: Gavin Shan +--- + tests/qtest/numa-test.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/tests/qtest/numa-test.c b/tests/qtest/numa-test.c +index 90bf68a5b3..aeda8c774c 100644 +--- a/tests/qtest/numa-test.c ++++ b/tests/qtest/numa-test.c +@@ -223,7 +223,8 @@ static void aarch64_numa_cpu(const void *data) + QTestState *qts; + g_autofree char *cli = NULL; + +- cli = make_cli(data, "-machine smp.cpus=2 " ++ cli = make_cli(data, "-machine " ++ "smp.cpus=2,smp.sockets=1,smp.clusters=1,smp.cores=1,smp.threads=2 " + "-numa node,nodeid=0,memdev=ram -numa node,nodeid=1 " + "-numa cpu,node-id=1,thread-id=0 " + "-numa cpu,node-id=0,thread-id=1"); +-- +2.31.1 + diff --git a/kvm-redhat-Add-rhel8.6.0-machine-type-for-s390x.patch b/kvm-redhat-Add-rhel8.6.0-machine-type-for-s390x.patch new file mode 100644 index 0000000..edf8ec9 --- /dev/null +++ b/kvm-redhat-Add-rhel8.6.0-machine-type-for-s390x.patch @@ -0,0 +1,69 @@ +From 3541c9fc2c2dd5cf7dd583bc5645d82ea928d9e8 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 10 Dec 2021 10:07:40 +0100 +Subject: [PATCH 1/2] redhat: Add rhel8.6.0 machine type for s390x +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 90: Add rhel8.6.0 machine type for s390x +RH-Commit: [1/1] 91961fc52d708e6b30d7361fbab3572c5b5c1859 +RH-Bugzilla: 2005325 +RH-Acked-by: Greg Kurz +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2005325 + +The new machine type has better default values for the upcoming +"generation 16" mainframe. + +Signed-off-by: Thomas Huth +--- + hw/s390x/s390-virtio-ccw.c | 14 +++++++++++++- + 1 file changed, 13 insertions(+), 1 deletion(-) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index cf13c457d6..9795eb9406 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1103,10 +1103,21 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) + DEFINE_CCW_MACHINE(2_4, "2.4", false); + #endif + ++static void ccw_machine_rhel860_instance_options(MachineState *machine) ++{ ++} ++ ++static void ccw_machine_rhel860_class_options(MachineClass *mc) ++{ ++} ++DEFINE_CCW_MACHINE(rhel860, "rhel8.6.0", true); ++ + static void ccw_machine_rhel850_instance_options(MachineState *machine) + { + static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_0 }; + ++ ccw_machine_rhel860_instance_options(machine); ++ + s390_set_qemu_cpu_model(0x2964, 13, 2, qemu_cpu_feat); + + s390_cpudef_featoff_greater(16, 1, S390_FEAT_NNPA); +@@ -1118,10 +1129,11 @@ static void ccw_machine_rhel850_instance_options(MachineState *machine) + + static void ccw_machine_rhel850_class_options(MachineClass *mc) + { ++ ccw_machine_rhel860_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); + mc->smp_props.prefer_sockets = true; + } +-DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", true); ++DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", false); + + static void ccw_machine_rhel840_instance_options(MachineState *machine) + { +-- +2.27.0 + diff --git a/kvm-redhat-Define-pseries-rhel8.6.0-machine-type.patch b/kvm-redhat-Define-pseries-rhel8.6.0-machine-type.patch new file mode 100644 index 0000000..760a5fd --- /dev/null +++ b/kvm-redhat-Define-pseries-rhel8.6.0-machine-type.patch @@ -0,0 +1,76 @@ +From 300cdf7f5b8b34e111c5e4141684af7329be46d9 Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Mon, 13 Dec 2021 15:42:41 +0100 +Subject: [PATCH 2/2] redhat: Define pseries-rhel8.6.0 machine type +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Greg Kurz +RH-MergeRequest: 92: redhat: Define pseries-rhel8.6.0 machine type +RH-Commit: [1/1] 3c0f59d7ddf4bb22f382b5df7daa136730b9e866 +RH-Bugzilla: 2031041 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: David Gibson (Red Hat) +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Laurent Vivier + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2031041 + +BRANCH: rhel-8.6.0 + +UPSTREAM: RHEL only + +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=41989147 + +Signed-off-by: Greg Kurz +--- + hw/ppc/spapr.c | 18 +++++++++++++++--- + 1 file changed, 15 insertions(+), 3 deletions(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index 2f27888d8a..32cfe8f006 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -5170,6 +5170,19 @@ static void spapr_machine_rhel_default_class_options(MachineClass *mc) + mc->max_cpus = 384; + } + ++/* ++ * pseries-rhel8.6.0 ++ * like pseries-6.2 ++ */ ++ ++static void spapr_machine_rhel860_class_options(MachineClass *mc) ++{ ++ /* The default machine type must apply the RHEL specific defaults */ ++ spapr_machine_rhel_default_class_options(mc); ++} ++ ++DEFINE_SPAPR_MACHINE(rhel860, "rhel8.6.0", true); ++ + /* + * pseries-rhel8.5.0 + * like pseries-6.0 +@@ -5179,15 +5192,14 @@ static void spapr_machine_rhel850_class_options(MachineClass *mc) + { + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + +- /* The default machine type must apply the RHEL specific defaults */ +- spapr_machine_rhel_default_class_options(mc); ++ spapr_machine_rhel860_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, + hw_compat_rhel_8_5_len); + smc->pre_6_2_numa_affinity = true; + mc->smp_props.prefer_sockets = true; + } + +-DEFINE_SPAPR_MACHINE(rhel850, "rhel8.5.0", true); ++DEFINE_SPAPR_MACHINE(rhel850, "rhel8.5.0", false); + + /* + * pseries-rhel8.4.0 +-- +2.27.0 + diff --git a/kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch b/kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch new file mode 100644 index 0000000..f027c45 --- /dev/null +++ b/kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch @@ -0,0 +1,106 @@ +From 236f216309261bc924e49014267998fdc2ef7f46 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 Jul 2022 16:55:34 +0200 +Subject: [PATCH 28/32] redhat: Update linux-headers/linux/kvm.h to v5.18-rc6 + +RH-Author: Thomas Huth +RH-MergeRequest: 109: Honor storage keys during emulation of I/O instructions +RH-Commit: [1/2] f306d7ff8efa64b14158388b95815ac556a25d8a (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2111994 +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Claudio Imbrenda + +Upstream Status: RHEL-only +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2111994 + +Based on upstream commit e4082063e47e9731dbeb1c26174c17f6038f577f +("linux-headers: Update to v5.18-rc6"), but this is focusing on +the file linux-headers/linux/kvm.h only (since the other changes +related to the VFIO renaming might break some stuff). + +Signed-off-by: Thomas Huth +--- + linux-headers/linux/kvm.h | 27 +++++++++++++++++++++------ + 1 file changed, 21 insertions(+), 6 deletions(-) + +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index d232feaae9..0d05d02ee4 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -445,7 +445,11 @@ struct kvm_run { + #define KVM_SYSTEM_EVENT_RESET 2 + #define KVM_SYSTEM_EVENT_CRASH 3 + __u32 type; +- __u64 flags; ++ __u32 ndata; ++ union { ++ __u64 flags; ++ __u64 data[16]; ++ }; + } system_event; + /* KVM_EXIT_S390_STSI */ + struct { +@@ -562,9 +566,12 @@ struct kvm_s390_mem_op { + __u32 op; /* type of operation */ + __u64 buf; /* buffer in userspace */ + union { +- __u8 ar; /* the access register number */ ++ struct { ++ __u8 ar; /* the access register number */ ++ __u8 key; /* access key, ignored if flag unset */ ++ }; + __u32 sida_offset; /* offset into the sida */ +- __u8 reserved[32]; /* should be set to 0 */ ++ __u8 reserved[32]; /* ignored */ + }; + }; + /* types for kvm_s390_mem_op->op */ +@@ -572,9 +579,12 @@ struct kvm_s390_mem_op { + #define KVM_S390_MEMOP_LOGICAL_WRITE 1 + #define KVM_S390_MEMOP_SIDA_READ 2 + #define KVM_S390_MEMOP_SIDA_WRITE 3 ++#define KVM_S390_MEMOP_ABSOLUTE_READ 4 ++#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5 + /* flags for kvm_s390_mem_op->flags */ + #define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) + #define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) ++#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) + + /* for KVM_INTERRUPT */ + struct kvm_interrupt { +@@ -1134,6 +1144,12 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_VM_GPA_BITS 207 + #define KVM_CAP_XSAVE2 208 + #define KVM_CAP_SYS_ATTRIBUTES 209 ++#define KVM_CAP_PPC_AIL_MODE_3 210 ++#define KVM_CAP_S390_MEM_OP_EXTENSION 211 ++#define KVM_CAP_PMU_CAPABILITY 212 ++#define KVM_CAP_DISABLE_QUIRKS2 213 ++/* #define KVM_CAP_VM_TSC_CONTROL 214 */ ++#define KVM_CAP_SYSTEM_EVENT_DATA 215 + + #ifdef KVM_CAP_IRQ_ROUTING + +@@ -1624,9 +1640,6 @@ struct kvm_enc_region { + #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) + #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) + +-/* Available with KVM_CAP_XSAVE2 */ +-#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) +- + struct kvm_s390_pv_sec_parm { + __u64 origin; + __u64 length; +@@ -1973,6 +1986,8 @@ struct kvm_dirty_gfn { + #define KVM_BUS_LOCK_DETECTION_OFF (1 << 0) + #define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1) + ++#define KVM_PMU_CAP_DISABLE (1 << 0) ++ + /** + * struct kvm_stats_header - Header of per vm/vcpu binary statistics data. + * @flags: Some extra information for header, always 0 for now. +-- +2.31.1 + diff --git a/kvm-rhel-machine-types-x86-set-prefer_sockets.patch b/kvm-rhel-machine-types-x86-set-prefer_sockets.patch new file mode 100644 index 0000000..d7bfc96 --- /dev/null +++ b/kvm-rhel-machine-types-x86-set-prefer_sockets.patch @@ -0,0 +1,52 @@ +From 0f0cbd57a8fe8f463941656f5bc75ae5754c3d2b Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 7 Dec 2021 18:39:47 +0000 +Subject: [PATCH 6/6] rhel machine types/x86: set prefer_sockets + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 96: Fixup x86 prefer_sockets +RH-Commit: [1/1] 29578bcc2f5d3408c155c155cdfa10b7a12faf4d +RH-Bugzilla: 2029582 +RH-Acked-by: Igor Mammedov +RH-Acked-by: quintela1 +RH-Acked-by: Cornelia Huck + +When I fixed up the machine types for 8.5 I missed the + prefer_sockets = true + +add them in; it looks like Power, ARM already have them, and I see them +in thuth's s390 patch. + +Signed-off-by: Dr. David Alan Gilbert +--- + hw/i386/pc_piix.c | 1 + + hw/i386/pc_q35.c | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 37fab00733..c30057c443 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -1020,6 +1020,7 @@ static void pc_machine_rhel7_options(MachineClass *m) + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); + m->alias = "pc"; + m->is_default = 1; ++ m->smp_props.prefer_sockets = true; + } + + static void pc_init_rhel760(MachineState *machine) +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 78876e1101..f6e77bca0e 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -662,6 +662,7 @@ static void pc_q35_machine_rhel850_options(MachineClass *m) + hw_compat_rhel_8_5_len); + compat_props_add(m->compat_props, pc_rhel_8_5_compat, + pc_rhel_8_5_compat_len); ++ m->smp_props.prefer_sockets = true; + } + + DEFINE_PC_MACHINE(q35_rhel850, "pc-q35-rhel8.5.0", pc_q35_init_rhel850, +-- +2.27.0 + diff --git a/kvm-s390x-css-fix-PMCW-invalid-mask.patch b/kvm-s390x-css-fix-PMCW-invalid-mask.patch new file mode 100644 index 0000000..959eea9 --- /dev/null +++ b/kvm-s390x-css-fix-PMCW-invalid-mask.patch @@ -0,0 +1,58 @@ +From f3125f6379cbc070e9acaf58d0ec37972992744b Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Wed, 6 Apr 2022 10:56:26 +0200 +Subject: [PATCH 4/5] s390x/css: fix PMCW invalid mask + +RH-Author: Thomas Huth +RH-MergeRequest: 145: s390x/css: fix PMCW invalid mask +RH-Commit: [1/1] fbf192f651aa668af56ca5c77455595fcdb19508 +RH-Bugzilla: 2071070 +RH-Acked-by: Jon Maloy +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2071070 + +commit 2df59b73e0864f021f6179f32f7ed364f6d4f38d +Author: Nico Boehr +Date: Thu Dec 16 14:16:57 2021 +0100 + + s390x/css: fix PMCW invalid mask + + Previously, we required bits 5, 6 and 7 to be zero (0x07 == 0b111). But, + as per the principles of operation, bit 5 is ignored in MSCH and bits 0, + 1, 6 and 7 need to be zero. + + As both PMCW_FLAGS_MASK_INVALID and ioinst_schib_valid() are only used + by ioinst_handle_msch(), adjust the mask accordingly. + + Fixes: db1c8f53bfb1 ("s390: Channel I/O basic definitions.") + Signed-off-by: Nico Boehr + Reviewed-by: Pierre Morel + Reviewed-by: Halil Pasic + Reviewed-by: Janosch Frank + Reviewed-by: Cornelia Huck + Message-Id: <20211216131657.1057978-1-nrb@linux.ibm.com> + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + include/hw/s390x/ioinst.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/hw/s390x/ioinst.h b/include/hw/s390x/ioinst.h +index 3771fff9d4..ea8d0f2444 100644 +--- a/include/hw/s390x/ioinst.h ++++ b/include/hw/s390x/ioinst.h +@@ -107,7 +107,7 @@ QEMU_BUILD_BUG_MSG(sizeof(PMCW) != 28, "size of PMCW is wrong"); + #define PMCW_FLAGS_MASK_MP 0x0004 + #define PMCW_FLAGS_MASK_TF 0x0002 + #define PMCW_FLAGS_MASK_DNV 0x0001 +-#define PMCW_FLAGS_MASK_INVALID 0x0700 ++#define PMCW_FLAGS_MASK_INVALID 0xc300 + + #define PMCW_CHARS_MASK_ST 0x00e00000 + #define PMCW_CHARS_MASK_MBFC 0x00000004 +-- +2.27.0 + diff --git a/kvm-s390x.conf b/kvm-s390x.conf new file mode 100644 index 0000000..d82b818 --- /dev/null +++ b/kvm-s390x.conf @@ -0,0 +1,19 @@ +# User changes in this file are preserved across upgrades. +# +# Setting "modprobe kvm nested=1" only enables Nested Virtualization until +# the next reboot or module reload. Uncomment the option below to enable +# the feature permanently. +# +#options kvm nested=1 +# +# +# Setting "modprobe kvm hpage=1" only enables Huge Page Backing (1MB) +# support until the next reboot or module reload. Uncomment the option +# below to enable the feature permanently. +# +# Note: - Incompatible with "nested=1". Loading the module will fail. +# - Dirty page logging will be performed on a 1MB (not 4KB) basis, +# which can result in a lot of data having to be transferred during +# migration, and therefore taking very long to converge. +# +#options kvm hpage=1 diff --git a/kvm-setup b/kvm-setup new file mode 100644 index 0000000..3bfedf6 --- /dev/null +++ b/kvm-setup @@ -0,0 +1,49 @@ +#! /bin/bash + +kvm_setup_powerpc () { + if grep '^platform[[:space:]]*:[[:space:]]*PowerNV' /proc/cpuinfo > /dev/null; then + # PowerNV platform, which is KVM HV capable + + if [ -z "$SUBCORES" ]; then + SUBCORES=1 + fi + + # Step 1. Load the KVM HVmodule + if ! modprobe -b kvm_hv; then + return + fi + + # On POWER8 a host core can only run threads of a single + # guest, meaning that SMT must be disabled on the host in + # order to run KVM guests. (Also applieds to POWER7, but we + # don't support that). + # + # POWER9 doesn't have this limitation (though it will for hash + # guests on radix host when that's implemented). So, only set + # up subcores and disable SMT for POWER*. + if grep '^cpu[[:space:]]*:[[:space:]]*POWER8' /proc/cpuinfo > /dev/null; then + # Step 2. Configure subcore mode + /usr/sbin/ppc64_cpu --subcores-per-core=$SUBCORES + + # Step 3. Disable SMT (multithreading) + /usr/sbin/ppc64_cpu --smt=off + fi + fi +} + +kvm_setup_s390x () { + if grep -q "^features.*sie" /proc/cpuinfo; then + modprobe kvm + fi +} + +case $(uname -m) in + ppc64|ppc64le) + kvm_setup_powerpc + ;; + s390x) + kvm_setup_s390x + ;; +esac + +exit 0 diff --git a/kvm-setup.service b/kvm-setup.service new file mode 100644 index 0000000..9c4bf97 --- /dev/null +++ b/kvm-setup.service @@ -0,0 +1,14 @@ +[Unit] +Description=Perform system configuration to prepare system to run KVM guests +# Offlining CPUs can cause irqbalance to throw warnings if it's running +Before=irqbalance.service +# libvirtd reads CPU topology at startup, so change it before +Before=libvirtd.service + +[Service] +Type=oneshot +EnvironmentFile=-/etc/sysconfig/kvm +ExecStart=/usr/lib/systemd/kvm-setup + +[Install] +WantedBy=multi-user.target diff --git a/kvm-softmmu-fix-device-deletion-events-with-device-JSON-.patch b/kvm-softmmu-fix-device-deletion-events-with-device-JSON-.patch new file mode 100644 index 0000000..c6fcf61 --- /dev/null +++ b/kvm-softmmu-fix-device-deletion-events-with-device-JSON-.patch @@ -0,0 +1,131 @@ +From afe1a63fe0cf863e024889edd82b9a380bfa8230 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= +Date: Wed, 5 Jan 2022 12:38:47 +0000 +Subject: [PATCH 2/6] softmmu: fix device deletion events with -device JSON + syntax +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Kevin Wolf +RH-MergeRequest: 103: Fix hot unplug of devices created with -device JSON syntax +RH-Commit: [1/1] 64cbc78bcb46bdb24d5f589ceb5ad598c388e447 +RH-Bugzilla: 2033279 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Thomas Huth +RH-Acked-by: Jano Tomko +RH-Acked-by: Daniel P. Berrangé + +The -device JSON syntax impl leaks a reference on the created +DeviceState instance. As a result when you hot-unplug the +device, the device_finalize method won't be called and thus +it will fail to emit the required DEVICE_DELETED event. + +A 'json-cli' feature was previously added against the +'device_add' QMP command QAPI schema to indicated to mgmt +apps that -device supported JSON syntax. Given the hotplug +bug that feature flag is not usable for its purpose, so +we add a new 'json-cli-hotplug' feature to indicate the +-device supports JSON without breaking hotplug. + +Fixes: 5dacda5167560b3af8eadbce5814f60ba44b467e +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/802 +Signed-off-by: Daniel P. Berrangé +Message-Id: <20220105123847.4047954-2-berrange@redhat.com> +Reviewed-by: Laurent Vivier +Tested-by: Ján Tomko +Reviewed-by: Thomas Huth +Signed-off-by: Kevin Wolf +(cherry picked from commit 64b4529a432507ee84a924be69a03432639e87ba) +Signed-off-by: Kevin Wolf +--- + qapi/qdev.json | 5 ++++- + softmmu/vl.c | 4 +++- + tests/qtest/device-plug-test.c | 19 +++++++++++++++++++ + 3 files changed, 26 insertions(+), 2 deletions(-) + +diff --git a/qapi/qdev.json b/qapi/qdev.json +index 69656b14df..26cd10106b 100644 +--- a/qapi/qdev.json ++++ b/qapi/qdev.json +@@ -44,6 +44,9 @@ + # @json-cli: If present, the "-device" command line option supports JSON + # syntax with a structure identical to the arguments of this + # command. ++# @json-cli-hotplug: If present, the "-device" command line option supports JSON ++# syntax without the reference counting leak that broke ++# hot-unplug + # + # Notes: + # +@@ -74,7 +77,7 @@ + { 'command': 'device_add', + 'data': {'driver': 'str', '*bus': 'str', '*id': 'str'}, + 'gen': false, # so we can get the additional arguments +- 'features': ['json-cli'] } ++ 'features': ['json-cli', 'json-cli-hotplug'] } + + ## + # @device_del: +diff --git a/softmmu/vl.c b/softmmu/vl.c +index d46b8fb4ab..b3829e2edd 100644 +--- a/softmmu/vl.c ++++ b/softmmu/vl.c +@@ -2690,6 +2690,7 @@ static void qemu_create_cli_devices(void) + qemu_opts_foreach(qemu_find_opts("device"), + device_init_func, NULL, &error_fatal); + QTAILQ_FOREACH(opt, &device_opts, next) { ++ DeviceState *dev; + loc_push_restore(&opt->loc); + /* + * TODO Eventually we should call qmp_device_add() here to make sure it +@@ -2698,7 +2699,8 @@ static void qemu_create_cli_devices(void) + * from the start, so call qdev_device_add_from_qdict() directly for + * now. + */ +- qdev_device_add_from_qdict(opt->opts, true, &error_fatal); ++ dev = qdev_device_add_from_qdict(opt->opts, true, &error_fatal); ++ object_unref(OBJECT(dev)); + loc_pop(&opt->loc); + } + rom_reset_order_override(); +diff --git a/tests/qtest/device-plug-test.c b/tests/qtest/device-plug-test.c +index 559d47727a..ad79bd4c14 100644 +--- a/tests/qtest/device-plug-test.c ++++ b/tests/qtest/device-plug-test.c +@@ -77,6 +77,23 @@ static void test_pci_unplug_request(void) + qtest_quit(qtest); + } + ++static void test_pci_unplug_json_request(void) ++{ ++ QTestState *qtest = qtest_initf( ++ "-device '{\"driver\": \"virtio-mouse-pci\", \"id\": \"dev0\"}'"); ++ ++ /* ++ * Request device removal. As the guest is not running, the request won't ++ * be processed. However during system reset, the removal will be ++ * handled, removing the device. ++ */ ++ device_del(qtest, "dev0"); ++ system_reset(qtest); ++ wait_device_deleted_event(qtest, "dev0"); ++ ++ qtest_quit(qtest); ++} ++ + static void test_ccw_unplug(void) + { + QTestState *qtest = qtest_initf("-device virtio-balloon-ccw,id=dev0"); +@@ -145,6 +162,8 @@ int main(int argc, char **argv) + */ + qtest_add_func("/device-plug/pci-unplug-request", + test_pci_unplug_request); ++ qtest_add_func("/device-plug/pci-unplug-json-request", ++ test_pci_unplug_json_request); + + if (!strcmp(arch, "s390x")) { + qtest_add_func("/device-plug/ccw-unplug", +-- +2.27.0 + diff --git a/kvm-softmmu-physmem-Introduce-MemTxAttrs-memory-field-an.patch b/kvm-softmmu-physmem-Introduce-MemTxAttrs-memory-field-an.patch new file mode 100644 index 0000000..519c48d --- /dev/null +++ b/kvm-softmmu-physmem-Introduce-MemTxAttrs-memory-field-an.patch @@ -0,0 +1,175 @@ +From fe4e22b9ccf2eb55d61eccf5050fb7aeafb5fe20 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 13 Apr 2022 14:51:06 -0400 +Subject: [PATCH 3/3] softmmu/physmem: Introduce MemTxAttrs::memory field and + MEMTX_ACCESS_ERROR +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 151: hw/intc/arm_gicv3: Check for !MEMTX_OK instead of MEMTX_ERROR +RH-Commit: [3/3] b1ebc1e99f21ba0b9eccb284e260b56c7a8e64d8 (jmaloy/qemu-kvm) +RH-Bugzilla: 1999236 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Peter Xu + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1999236 +Upstream: Merged +CVE: CVE-2021-3750 +Conflicts: memalign.h has not been introduced in this version. Instead, + we include osdep.h where the function prototypes are to be + found. + +commit 3ab6fdc91b72e156da22848f0003ff4225690ced +Author: Philippe Mathieu-Daudé +Date: Wed Dec 15 19:24:21 2021 +0100 + + softmmu/physmem: Introduce MemTxAttrs::memory field and MEMTX_ACCESS_ERROR + + Add the 'memory' bit to the memory attributes to restrict bus + controller accesses to memories. + + Introduce flatview_access_allowed() to check bus permission + before running any bus transaction. + + Have read/write accessors return MEMTX_ACCESS_ERROR if an access is + restricted. + + There is no change for the default case where 'memory' is not set. + + Signed-off-by: Philippe Mathieu-Daudé + Message-Id: <20211215182421.418374-4-philmd@redhat.com> + Reviewed-by: Richard Henderson + Reviewed-by: Stefan Hajnoczi + [thuth: Replaced MEMTX_BUS_ERROR with MEMTX_ACCESS_ERROR, remove "inline"] + Signed-off-by: Thomas Huth + +(cherry picked from commit 3ab6fdc91b72e156da22848f0003ff4225690ced) +Signed-off-by: Jon Maloy +--- + include/exec/memattrs.h | 9 +++++++++ + softmmu/physmem.c | 45 +++++++++++++++++++++++++++++++++++++++-- + 2 files changed, 52 insertions(+), 2 deletions(-) + +diff --git a/include/exec/memattrs.h b/include/exec/memattrs.h +index 95f2d20d55..9fb98bc1ef 100644 +--- a/include/exec/memattrs.h ++++ b/include/exec/memattrs.h +@@ -35,6 +35,14 @@ typedef struct MemTxAttrs { + unsigned int secure:1; + /* Memory access is usermode (unprivileged) */ + unsigned int user:1; ++ /* ++ * Bus interconnect and peripherals can access anything (memories, ++ * devices) by default. By setting the 'memory' bit, bus transaction ++ * are restricted to "normal" memories (per the AMBA documentation) ++ * versus devices. Access to devices will be logged and rejected ++ * (see MEMTX_ACCESS_ERROR). ++ */ ++ unsigned int memory:1; + /* Requester ID (for MSI for example) */ + unsigned int requester_id:16; + /* Invert endianness for this page */ +@@ -66,6 +74,7 @@ typedef struct MemTxAttrs { + #define MEMTX_OK 0 + #define MEMTX_ERROR (1U << 0) /* device returned an error */ + #define MEMTX_DECODE_ERROR (1U << 1) /* nothing at that address */ ++#define MEMTX_ACCESS_ERROR (1U << 2) /* access denied */ + typedef uint32_t MemTxResult; + + #endif +diff --git a/softmmu/physmem.c b/softmmu/physmem.c +index 483a31be81..4d0ef5f92f 100644 +--- a/softmmu/physmem.c ++++ b/softmmu/physmem.c +@@ -41,6 +41,8 @@ + #include "qemu/config-file.h" + #include "qemu/error-report.h" + #include "qemu/qemu-print.h" ++#include "qemu/log.h" ++#include "qemu/osdep.h" + #include "exec/memory.h" + #include "exec/ioport.h" + #include "sysemu/dma.h" +@@ -2759,6 +2761,33 @@ static bool prepare_mmio_access(MemoryRegion *mr) + return release_lock; + } + ++/** ++ * flatview_access_allowed ++ * @mr: #MemoryRegion to be accessed ++ * @attrs: memory transaction attributes ++ * @addr: address within that memory region ++ * @len: the number of bytes to access ++ * ++ * Check if a memory transaction is allowed. ++ * ++ * Returns: true if transaction is allowed, false if denied. ++ */ ++static bool flatview_access_allowed(MemoryRegion *mr, MemTxAttrs attrs, ++ hwaddr addr, hwaddr len) ++{ ++ if (likely(!attrs.memory)) { ++ return true; ++ } ++ if (memory_region_is_ram(mr)) { ++ return true; ++ } ++ qemu_log_mask(LOG_GUEST_ERROR, ++ "Invalid access to non-RAM device at " ++ "addr 0x%" HWADDR_PRIX ", size %" HWADDR_PRIu ", " ++ "region '%s'\n", addr, len, memory_region_name(mr)); ++ return false; ++} ++ + /* Called within RCU critical section. */ + static MemTxResult flatview_write_continue(FlatView *fv, hwaddr addr, + MemTxAttrs attrs, +@@ -2773,7 +2802,10 @@ static MemTxResult flatview_write_continue(FlatView *fv, hwaddr addr, + const uint8_t *buf = ptr; + + for (;;) { +- if (!memory_access_is_direct(mr, true)) { ++ if (!flatview_access_allowed(mr, attrs, addr1, l)) { ++ result |= MEMTX_ACCESS_ERROR; ++ /* Keep going. */ ++ } else if (!memory_access_is_direct(mr, true)) { + release_lock |= prepare_mmio_access(mr); + l = memory_access_size(mr, l, addr1); + /* XXX: could force current_cpu to NULL to avoid +@@ -2818,6 +2850,9 @@ static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs, + + l = len; + mr = flatview_translate(fv, addr, &addr1, &l, true, attrs); ++ if (!flatview_access_allowed(mr, attrs, addr, len)) { ++ return MEMTX_ACCESS_ERROR; ++ } + return flatview_write_continue(fv, addr, attrs, buf, len, + addr1, l, mr); + } +@@ -2836,7 +2871,10 @@ MemTxResult flatview_read_continue(FlatView *fv, hwaddr addr, + + fuzz_dma_read_cb(addr, len, mr); + for (;;) { +- if (!memory_access_is_direct(mr, false)) { ++ if (!flatview_access_allowed(mr, attrs, addr1, l)) { ++ result |= MEMTX_ACCESS_ERROR; ++ /* Keep going. */ ++ } else if (!memory_access_is_direct(mr, false)) { + /* I/O case */ + release_lock |= prepare_mmio_access(mr); + l = memory_access_size(mr, l, addr1); +@@ -2879,6 +2917,9 @@ static MemTxResult flatview_read(FlatView *fv, hwaddr addr, + + l = len; + mr = flatview_translate(fv, addr, &addr1, &l, false, attrs); ++ if (!flatview_access_allowed(mr, attrs, addr, len)) { ++ return MEMTX_ACCESS_ERROR; ++ } + return flatview_read_continue(fv, addr, attrs, buf, len, + addr1, l, mr); + } +-- +2.27.0 + diff --git a/kvm-softmmu-physmem-Simplify-flatview_write-and-address_.patch b/kvm-softmmu-physmem-Simplify-flatview_write-and-address_.patch new file mode 100644 index 0000000..62f7037 --- /dev/null +++ b/kvm-softmmu-physmem-Simplify-flatview_write-and-address_.patch @@ -0,0 +1,80 @@ +From 916423392b46167c6683b0240610bb5a745590da Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 13 Apr 2022 14:51:06 -0400 +Subject: [PATCH 2/3] softmmu/physmem: Simplify flatview_write and + address_space_access_valid +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 151: hw/intc/arm_gicv3: Check for !MEMTX_OK instead of MEMTX_ERROR +RH-Commit: [2/3] daabe41eefd5c519def592e374fa368e32a680d3 (jmaloy/qemu-kvm) +RH-Bugzilla: 1999236 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Peter Xu + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1999236 +Upstream: Merged +CVE: CVE-2021-3750 + +commit 58e74682baf4e1ad26b064d8c02e5bc99c75c5d9 +Author: Philippe Mathieu-Daudé +Date: Wed Dec 15 19:24:20 2021 +0100 + + softmmu/physmem: Simplify flatview_write and address_space_access_valid + + Remove unuseful local 'result' variables. + + Reviewed-by: Peter Xu + Reviewed-by: David Hildenbrand + Reviewed-by: Alexander Bulekov + Reviewed-by: Stefan Hajnoczi + Signed-off-by: Philippe Mathieu-Daudé + Message-Id: <20211215182421.418374-3-philmd@redhat.com> + Signed-off-by: Thomas Huth + +(cherry picked from commit 58e74682baf4e1ad26b064d8c02e5bc99c75c5d9) +Signed-off-by: Jon Maloy +--- + softmmu/physmem.c | 11 +++-------- + 1 file changed, 3 insertions(+), 8 deletions(-) + +diff --git a/softmmu/physmem.c b/softmmu/physmem.c +index 3524c04c2a..483a31be81 100644 +--- a/softmmu/physmem.c ++++ b/softmmu/physmem.c +@@ -2815,14 +2815,11 @@ static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs, + hwaddr l; + hwaddr addr1; + MemoryRegion *mr; +- MemTxResult result = MEMTX_OK; + + l = len; + mr = flatview_translate(fv, addr, &addr1, &l, true, attrs); +- result = flatview_write_continue(fv, addr, attrs, buf, len, +- addr1, l, mr); +- +- return result; ++ return flatview_write_continue(fv, addr, attrs, buf, len, ++ addr1, l, mr); + } + + /* Called within RCU critical section. */ +@@ -3119,12 +3116,10 @@ bool address_space_access_valid(AddressSpace *as, hwaddr addr, + MemTxAttrs attrs) + { + FlatView *fv; +- bool result; + + RCU_READ_LOCK_GUARD(); + fv = address_space_to_flatview(as); +- result = flatview_access_valid(fv, addr, len, is_write, attrs); +- return result; ++ return flatview_access_valid(fv, addr, len, is_write, attrs); + } + + static hwaddr +-- +2.27.0 + diff --git a/kvm-sysemu-tpm-Add-a-stub-function-for-TPM_IS_CRB.patch b/kvm-sysemu-tpm-Add-a-stub-function-for-TPM_IS_CRB.patch new file mode 100644 index 0000000..8fd2e16 --- /dev/null +++ b/kvm-sysemu-tpm-Add-a-stub-function-for-TPM_IS_CRB.patch @@ -0,0 +1,54 @@ +From 74b3e92dcb9e343e135a681259514b4fd28086ea Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Fri, 6 May 2022 15:25:09 +0200 +Subject: [PATCH 4/5] sysemu: tpm: Add a stub function for TPM_IS_CRB + +RH-Author: Eric Auger +RH-MergeRequest: 84: vfio/common: Remove spurious tpm-crb-cmd misalignment warning +RH-Commit: [1/2] 0ab55ca1aa12a3a7cbdef5a378928f75e030e536 (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2037612 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cornelia Huck +RH-Acked-by: Andrew Jones + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2037612 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45166961 +Upstream Status: YES +Tested: With TPM-CRB and VFIO + +In a subsequent patch, VFIO will need to recognize if +a memory region owner is a TPM CRB device. Hence VFIO +needs to use TPM_IS_CRB() even if CONFIG_TPM is unset. So +let's add a stub function. + +Signed-off-by: Eric Auger +Suggested-by: Cornelia Huck +Reviewed-by: Stefan Berger +Link: https://lore.kernel.org/r/20220506132510.1847942-2-eric.auger@redhat.com +Signed-off-by: Alex Williamson +(cherry picked from commit 4168cdad398843ed53d650a27651868b4d3e21c9) +Signed-off-by: Eric Auger +--- + include/sysemu/tpm.h | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/include/sysemu/tpm.h b/include/sysemu/tpm.h +index 68b2206463..fb40e30ff6 100644 +--- a/include/sysemu/tpm.h ++++ b/include/sysemu/tpm.h +@@ -80,6 +80,12 @@ static inline TPMVersion tpm_get_version(TPMIf *ti) + #define tpm_init() (0) + #define tpm_cleanup() + ++/* needed for an alignment check in non-tpm code */ ++static inline Object *TPM_IS_CRB(Object *obj) ++{ ++ return NULL; ++} ++ + #endif /* CONFIG_TPM */ + + #endif /* QEMU_TPM_H */ +-- +2.31.1 + diff --git a/kvm-target-arm-deprecate-named-CPU-models.patch b/kvm-target-arm-deprecate-named-CPU-models.patch new file mode 100644 index 0000000..dbe8d24 --- /dev/null +++ b/kvm-target-arm-deprecate-named-CPU-models.patch @@ -0,0 +1,129 @@ +From 1f8528b71d96c01dd6106f11681f4a4e2776ef5f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= +Date: Mon, 21 Mar 2022 12:05:42 +0000 +Subject: [PATCH 06/18] target/arm: deprecate named CPU models +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Daniel P. Berrangé +RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models +RH-Commit: [6/6] afddeb9e898206fd04499f01c48caf7dc1a8b8ef (berrange/centos-src-qemu) +RH-Bugzilla: 2060839 +RH-Acked-by: Thomas Huth +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cornelia Huck + +KVM requires use of the 'host' CPU model, so named CPU models are only +needed for TCG. Since we don't consider TCG to be supported we can +deprecate all the named CPU models. TCG users can rely on 'max' model. + +Note: this has the effect of deprecating the default built-in CPU +model 'cortex-a57'. Applications using QEMU are expected to make an +explicit choice about which CPU model they want, since no builtin +default can suit all purposes. + +https://bugzilla.redhat.com/show_bug.cgi?id=2060839 +Signed-off-by: Daniel P. Berrangé +--- + target/arm/cpu-qom.h | 1 + + target/arm/cpu.c | 5 +++++ + target/arm/cpu.h | 2 ++ + target/arm/cpu64.c | 8 +++++++- + target/arm/helper.c | 2 ++ + 5 files changed, 17 insertions(+), 1 deletion(-) + +diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h +index 64c44cef2d..82e97249bc 100644 +--- a/target/arm/cpu-qom.h ++++ b/target/arm/cpu-qom.h +@@ -35,6 +35,7 @@ typedef struct ARMCPUInfo { + const char *name; + void (*initfn)(Object *obj); + void (*class_init)(ObjectClass *oc, void *data); ++ const char *deprecation_note; + } ARMCPUInfo; + + void arm_cpu_register(const ARMCPUInfo *info); +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index 5d4ca7a227..c74b0fb462 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -2105,8 +2105,13 @@ static void arm_cpu_instance_init(Object *obj) + static void cpu_register_class_init(ObjectClass *oc, void *data) + { + ARMCPUClass *acc = ARM_CPU_CLASS(oc); ++ CPUClass *cc = CPU_CLASS(oc); + + acc->info = data; ++ ++ if (acc->info->deprecation_note) { ++ cc->deprecation_note = acc->info->deprecation_note; ++ } + } + + void arm_cpu_register(const ARMCPUInfo *info) +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index 23879de5fa..c0c9f680e5 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -33,6 +33,8 @@ + #define KVM_HAVE_MCE_INJECTION 1 + #endif + ++#define RHEL_CPU_DEPRECATION "use 'host' / 'max'" ++ + #define EXCP_UDEF 1 /* undefined instruction */ + #define EXCP_SWI 2 /* software interrupt */ + #define EXCP_PREFETCH_ABORT 3 +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index e80b831073..c8f152891c 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -975,7 +975,8 @@ static void aarch64_a64fx_initfn(Object *obj) + #endif /* disabled for RHEL */ + + static const ARMCPUInfo aarch64_cpus[] = { +- { .name = "cortex-a57", .initfn = aarch64_a57_initfn }, ++ { .name = "cortex-a57", .initfn = aarch64_a57_initfn, ++ .deprecation_note = RHEL_CPU_DEPRECATION }, + #if 0 /* Disabled for Red Hat Enterprise Linux */ + { .name = "cortex-a53", .initfn = aarch64_a53_initfn }, + { .name = "cortex-a72", .initfn = aarch64_a72_initfn }, +@@ -1052,8 +1053,13 @@ static void aarch64_cpu_instance_init(Object *obj) + static void cpu_register_class_init(ObjectClass *oc, void *data) + { + ARMCPUClass *acc = ARM_CPU_CLASS(oc); ++ CPUClass *cc = CPU_CLASS(oc); + + acc->info = data; ++ ++ if (acc->info->deprecation_note) { ++ cc->deprecation_note = acc->info->deprecation_note; ++ } + } + + void aarch64_cpu_register(const ARMCPUInfo *info) +diff --git a/target/arm/helper.c b/target/arm/helper.c +index 7d14650615..3d34f63e49 100644 +--- a/target/arm/helper.c ++++ b/target/arm/helper.c +@@ -8560,6 +8560,7 @@ void arm_cpu_list(void) + static void arm_cpu_add_definition(gpointer data, gpointer user_data) + { + ObjectClass *oc = data; ++ CPUClass *cc = CPU_CLASS(oc); + CpuDefinitionInfoList **cpu_list = user_data; + CpuDefinitionInfo *info; + const char *typename; +@@ -8569,6 +8570,7 @@ static void arm_cpu_add_definition(gpointer data, gpointer user_data) + info->name = g_strndup(typename, + strlen(typename) - strlen("-" TYPE_ARM_CPU)); + info->q_typename = g_strdup(typename); ++ info->deprecated = !!cc->deprecation_note; + + QAPI_LIST_PREPEND(*cpu_list, info); + } +-- +2.35.3 + diff --git a/kvm-target-i386-deprecate-CPUs-older-than-x86_64-v2-ABI.patch b/kvm-target-i386-deprecate-CPUs-older-than-x86_64-v2-ABI.patch new file mode 100644 index 0000000..d63bfdb --- /dev/null +++ b/kvm-target-i386-deprecate-CPUs-older-than-x86_64-v2-ABI.patch @@ -0,0 +1,273 @@ +From 577b04770e47aed0f88acb4a415ed04ddbe087f1 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= +Date: Thu, 17 Mar 2022 17:59:22 +0000 +Subject: [PATCH 04/18] target/i386: deprecate CPUs older than x86_64-v2 ABI +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Daniel P. Berrangé +RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models +RH-Commit: [4/6] 71f6043f11b31ffa841a2e14d24972e571c18a9e (berrange/centos-src-qemu) +RH-Bugzilla: 2060839 +RH-Acked-by: Thomas Huth +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cornelia Huck + +RHEL-9 is compiled with the x86_64-v2 ABI. We use this as a baseline to +select which CPUs we want to support, such that there is at least one +supported guest CPU that can be launched for every physical machine +capable of running RHEL-9 KVM. + +Supported CPUs: + + * QEMU models + + base (QEMU internal) + host (host passthrough) + max (host passthrough for KVM, + all emulated features for TCG) + + * Intel models + + Icelake-Server + Icelake-Server-noTSX + Cascadelake-Server (2019) + Cascadelake-Server-noTSX (2019) + Skylake-Server (2016) + Skylake-Server-IBRS (2016) + Skylake-Server-noTSX-IBRS (2016) + Skylake-Client (2015) + Skylake-Client-IBRS (2015) + Skylake-Client-noTSX-IBRS (2015) + Broadwell (2014) + Broadwell-IBRS (2014) + Broadwell-noTSX (2014) + Broadwell-noTSX-IBRS (2014) + Haswell (2013) + Haswell-IBRS (2013) + Haswell-noTSX (2013) + Haswell-noTSX-IBRS (2013) + IvyBridge (2012) + IvyBridge-IBRS (2012) + SandyBridge (2011) + SandyBridge-IBRS (2011) + Westmere (2010) + Westmere-IBRS (2010) + Nehalem (2008) + Nehalem-IBRS (2008) + + Cooperlake (2020) + Snowridge (2019) + KnightsMill (2017) + Denverton (2016) + + * AMD models + + EPYC-Milan (2021) + EPYC-Rome (2019) + EPYC (2017) + EPYC-IBPB (2017) + Opteron_G5 (2012) + Opteron_G4 (2011) + + * Other + + Dhyana (2018) + +(I've omitted the many -vNNN versions for brevity) + +Deprecated CPUs: + + 486 + athlon + Conroe + core2duo + coreduo + Icelake-Client (already deprecated upstream) + Icelake-Client-noTSX (already deprecated upstream) + kvm32 + kvm64 + n270 + Opteron_G1 + Opteron_G2 + Opteron_G3 + Penryn + pentium2 + pentium3 + pentium + phenom + qemu32 + qemu64 + +The deprecated CPU models are subject to removal in a future +major version of RHEL. + +Note: this has the effect of deprecating the default built-in CPU +model 'qemu64'. Applications using QEMU are expected to make an +explicit choice about which CPU model they want, since no builtin +default can suit all purposes. + +https://bugzilla.redhat.com/show_bug.cgi?id=2060839 +Signed-off-by: Daniel P. Berrangé +--- + target/i386/cpu.c | 21 +++++++++++++++++++++ + 1 file changed, 21 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index cb6b5467d0..87cb641b5f 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1780,9 +1780,13 @@ static const CPUCaches epyc_milan_cache_info = { + * PT in VMX operation + */ + ++#define RHEL_CPU_DEPRECATION \ ++ "use at least 'Nehalem' / 'Opteron_G4', or 'host' / 'max'" ++ + static const X86CPUDefinition builtin_x86_defs[] = { + { + .name = "qemu64", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 0xd, + .vendor = CPUID_VENDOR_AMD, + .family = 15, +@@ -1803,6 +1807,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "phenom", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 5, + .vendor = CPUID_VENDOR_AMD, + .family = 16, +@@ -1835,6 +1840,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "core2duo", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 10, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -1877,6 +1883,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "kvm64", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 0xd, + .vendor = CPUID_VENDOR_INTEL, + .family = 15, +@@ -1918,6 +1925,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "qemu32", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 4, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -1932,6 +1940,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "kvm32", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 5, + .vendor = CPUID_VENDOR_INTEL, + .family = 15, +@@ -1962,6 +1971,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "coreduo", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 10, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -1995,6 +2005,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "486", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 1, + .vendor = CPUID_VENDOR_INTEL, + .family = 4, +@@ -2007,6 +2018,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "pentium", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 1, + .vendor = CPUID_VENDOR_INTEL, + .family = 5, +@@ -2019,6 +2031,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "pentium2", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 2, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -2031,6 +2044,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "pentium3", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 3, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -2043,6 +2057,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "athlon", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 2, + .vendor = CPUID_VENDOR_AMD, + .family = 6, +@@ -2058,6 +2073,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "n270", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 10, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -2083,6 +2099,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "Conroe", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 10, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -2123,6 +2140,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "Penryn", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 10, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -3832,6 +3850,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "Opteron_G1", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 5, + .vendor = CPUID_VENDOR_AMD, + .family = 15, +@@ -3852,6 +3871,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "Opteron_G2", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 5, + .vendor = CPUID_VENDOR_AMD, + .family = 15, +@@ -3874,6 +3894,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "Opteron_G3", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 5, + .vendor = CPUID_VENDOR_AMD, + .family = 16, +-- +2.35.3 + diff --git a/kvm-target-ppc-cpu-models-Fix-ppc_cpu_aliases-list-for-R.patch b/kvm-target-ppc-cpu-models-Fix-ppc_cpu_aliases-list-for-R.patch new file mode 100644 index 0000000..c940cdb --- /dev/null +++ b/kvm-target-ppc-cpu-models-Fix-ppc_cpu_aliases-list-for-R.patch @@ -0,0 +1,48 @@ +From 39642d0d37e2ef61ce7fde0bc284d37a365e4482 Mon Sep 17 00:00:00 2001 +From: Murilo Opsfelder Araujo +Date: Mon, 2 May 2022 17:59:11 -0300 +Subject: [PATCH 2/2] target/ppc/cpu-models: Fix ppc_cpu_aliases list for RHEL +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Murilo Opsfelder Araújo +RH-MergeRequest: 81: target/ppc/cpu-models: remove extraneous "#endif" +RH-Commit: [1/1] 5fff003ad3deb84c6a8e69ab90552a31edb3b058 (mopsfelder/centos-stream-src-qemu-kvm) +RH-Bugzilla: 2081022 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Thomas Huth +RH-Acked-by: Laurent Vivier + +The commit b9d28ecdedaf ("Enable/disable devices for RHEL") removed the +"#if 0" from the beginning of the ppc_cpu_aliases list, which broke the +build on ppc64le: + + ../target/ppc/cpu-models.c:904:2: error: #endif without #if + #endif + ^ + 1 error generated. + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2081022 + +Fixes: b9d28ecdedaf (Enable/disable devices for RHEL) +Signed-off-by: Murilo Opsfelder Araujo +--- + target/ppc/cpu-models.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c +index dd78883410..528467eac1 100644 +--- a/target/ppc/cpu-models.c ++++ b/target/ppc/cpu-models.c +@@ -746,6 +746,7 @@ + /* PowerPC CPU aliases */ + + PowerPCCPUAlias ppc_cpu_aliases[] = { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { "405", "405d4" }, + { "405cr", "405crc" }, + { "405gp", "405gpd" }, +-- +2.35.1 + diff --git a/kvm-target-s390x-deprecate-CPUs-older-than-z14.patch b/kvm-target-s390x-deprecate-CPUs-older-than-z14.patch new file mode 100644 index 0000000..212900d --- /dev/null +++ b/kvm-target-s390x-deprecate-CPUs-older-than-z14.patch @@ -0,0 +1,194 @@ +From 8459c305914e2a7a19dcd1662d54a89def7acfa6 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= +Date: Thu, 17 Mar 2022 17:59:22 +0000 +Subject: [PATCH 05/18] target/s390x: deprecate CPUs older than z14 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Daniel P. Berrangé +RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models +RH-Commit: [5/6] 2da9e06cf452287673f94f880a7eb8b2b37b7278 (berrange/centos-src-qemu) +RH-Bugzilla: 2060839 +RH-Acked-by: Thomas Huth +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cornelia Huck + +RHEL-9 is compiled with the z14 ABI. We use this as a baseline to +select which CPUs we want to support, such that there is at least one +supported guest CPU that can be launched for every physical +machine capable of running RHEL-9 KVM. + +Supported CPUs: + + gen15a-base + gen15a + gen15b-base + gen15b + gen16a-base + gen16a + gen16b-base + gen16b + max + qemu + z14.2-base + z14.2 + z14-base + z14 + z14ZR1-base + z14ZR1 + +Deprecated CPUs: + + z10BC.2-base + z10BC.2 + z10BC-base + z10BC + z10EC.2-base + z10EC.2 + z10EC.3-base + z10EC.3 + z10EC-base + z10EC + z114-base + z114 + z13.2-base + z13.2 + z13-base + z13s-base + z13s + z13 + z196.2-base + z196.2 + z196-base + z196 + z800-base + z800 + z890.2-base + z890.2 + z890.3-base + z890.3 + z890-base + z890 + z900.2-base + z900.2 + z900.3-base + z900.3 + z900-base + z900 + z990.2-base + z990.2 + z990.3-base + z990.3 + z990.4-base + z990.4 + z990.5-base + z990.5 + z990-base + z990 + z9BC.2-base + z9BC.2 + z9BC-base + z9BC + z9EC.2-base + z9EC.2 + z9EC.3-base + z9EC.3 + z9EC-base + z9EC + zBC12-base + zBC12 + zEC12.2-base + zEC12.2 + zEC12-base + zEC12 + +https://bugzilla.redhat.com/show_bug.cgi?id=2060839 +Signed-off-by: Daniel P. Berrangé +--- + target/s390x/cpu_models.c | 11 +++++++++++ + target/s390x/cpu_models.h | 2 ++ + target/s390x/cpu_models_sysemu.c | 2 ++ + 3 files changed, 15 insertions(+) + +diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c +index 6d71428056..9b9fc41676 100644 +--- a/target/s390x/cpu_models.c ++++ b/target/s390x/cpu_models.c +@@ -45,6 +45,9 @@ + * of a following release have been a superset of the previous release. With + * generation 15 one base feature and one optional feature have been deprecated. + */ ++ ++#define RHEL_CPU_DEPRECATION "use at least 'z14', or 'host' / 'qemu' / 'max'" ++ + static S390CPUDef s390_cpu_defs[] = { + CPUDEF_INIT(0x2064, 7, 1, 38, 0x00000000U, "z900", "IBM zSeries 900 GA1"), + CPUDEF_INIT(0x2064, 7, 2, 38, 0x00000000U, "z900.2", "IBM zSeries 900 GA2"), +@@ -852,22 +855,30 @@ static void s390_host_cpu_model_class_init(ObjectClass *oc, void *data) + static void s390_base_cpu_model_class_init(ObjectClass *oc, void *data) + { + S390CPUClass *xcc = S390_CPU_CLASS(oc); ++ CPUClass *cc = CPU_CLASS(oc); + + /* all base models are migration safe */ + xcc->cpu_def = (const S390CPUDef *) data; + xcc->is_migration_safe = true; + xcc->is_static = true; + xcc->desc = xcc->cpu_def->desc; ++ if (xcc->cpu_def->gen < 14) { ++ cc->deprecation_note = RHEL_CPU_DEPRECATION; ++ } + } + + static void s390_cpu_model_class_init(ObjectClass *oc, void *data) + { + S390CPUClass *xcc = S390_CPU_CLASS(oc); ++ CPUClass *cc = CPU_CLASS(oc); + + /* model that can change between QEMU versions */ + xcc->cpu_def = (const S390CPUDef *) data; + xcc->is_migration_safe = true; + xcc->desc = xcc->cpu_def->desc; ++ if (xcc->cpu_def->gen < 14) { ++ cc->deprecation_note = RHEL_CPU_DEPRECATION; ++ } + } + + static void s390_qemu_cpu_model_class_init(ObjectClass *oc, void *data) +diff --git a/target/s390x/cpu_models.h b/target/s390x/cpu_models.h +index 74d1f87e4f..372160bcd7 100644 +--- a/target/s390x/cpu_models.h ++++ b/target/s390x/cpu_models.h +@@ -38,6 +38,8 @@ struct S390CPUDef { + S390FeatBitmap full_feat; + /* used to init full_feat from generated data */ + S390FeatInit full_init; ++ /* if deprecated, provides a suggestion */ ++ const char *deprecation_note; + }; + + /* CPU model based on a CPU definition */ +diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c +index 6a04ccab1b..f3b7c304ec 100644 +--- a/target/s390x/cpu_models_sysemu.c ++++ b/target/s390x/cpu_models_sysemu.c +@@ -61,6 +61,7 @@ static void create_cpu_model_list(ObjectClass *klass, void *opaque) + CpuDefinitionInfo *info; + char *name = g_strdup(object_class_get_name(klass)); + S390CPUClass *scc = S390_CPU_CLASS(klass); ++ CPUClass *cc = CPU_CLASS(klass); + + /* strip off the -s390x-cpu */ + g_strrstr(name, "-" TYPE_S390_CPU)[0] = 0; +@@ -70,6 +71,7 @@ static void create_cpu_model_list(ObjectClass *klass, void *opaque) + info->migration_safe = scc->is_migration_safe; + info->q_static = scc->is_static; + info->q_typename = g_strdup(object_class_get_name(klass)); ++ info->deprecated = !!cc->deprecation_note; + /* check for unavailable features */ + if (cpu_list_data->model) { + Object *obj; +-- +2.35.3 + diff --git a/kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch b/kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch new file mode 100644 index 0000000..61752c7 --- /dev/null +++ b/kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch @@ -0,0 +1,103 @@ +From 27c1d979a994f5afc59c3520af58d15aa5aae723 Mon Sep 17 00:00:00 2001 +From: Janis Schoetterl-Glausch +Date: Fri, 6 May 2022 17:39:56 +0200 +Subject: [PATCH 29/32] target/s390x: kvm: Honor storage keys during emulation + +RH-Author: Thomas Huth +RH-MergeRequest: 109: Honor storage keys during emulation of I/O instructions +RH-Commit: [2/2] 346dee1e13bfe1c074e4c6a4417091711d852f9c (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2111994 +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Claudio Imbrenda + +Storage key controlled protection is currently not honored when +emulating instructions. +If available, enable key protection for the MEM_OP ioctl, thereby +enabling it for the s390_cpu_virt_mem_* functions, when using kvm. +As a result, the emulation of the following instructions honors storage +keys: + +* CLP + The Synch I/O CLP command would need special handling in order + to support storage keys, but is currently not supported. +* CHSC + Performing commands asynchronously would require special + handling, but commands are currently always synchronous. +* STSI +* TSCH + Must (and does) not change channel if terminated due to + protection. +* MSCH + Suppressed on protection, works because fetching instruction. +* SSCH + Suppressed on protection, works because fetching instruction. +* STSCH +* STCRW + Suppressed on protection, this works because no partial store is + possible, because the operand cannot span multiple pages. +* PCISTB +* MPCIFC +* STPCIFC + +Signed-off-by: Janis Schoetterl-Glausch +Message-Id: <20220506153956.2217601-3-scgl@linux.ibm.com> +Signed-off-by: Thomas Huth + +(cherry picked from commit 54354861d21b69ec0781f43e67b8d4f6edad7e3f) +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2111994 +Signed-off-by: Thomas Huth +--- + target/s390x/kvm/kvm.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c +index 74f089d87f..1f1d1a33b8 100644 +--- a/target/s390x/kvm/kvm.c ++++ b/target/s390x/kvm/kvm.c +@@ -152,12 +152,15 @@ const KVMCapabilityInfo kvm_arch_required_capabilities[] = { + static int cap_sync_regs; + static int cap_async_pf; + static int cap_mem_op; ++static int cap_mem_op_extension; + static int cap_s390_irq; + static int cap_ri; + static int cap_hpage_1m; + static int cap_vcpu_resets; + static int cap_protected; + ++static bool mem_op_storage_key_support; ++ + static int active_cmma; + + static int kvm_s390_query_mem_limit(uint64_t *memory_limit) +@@ -355,6 +358,8 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + cap_sync_regs = kvm_check_extension(s, KVM_CAP_SYNC_REGS); + cap_async_pf = kvm_check_extension(s, KVM_CAP_ASYNC_PF); + cap_mem_op = kvm_check_extension(s, KVM_CAP_S390_MEM_OP); ++ cap_mem_op_extension = kvm_check_extension(s, KVM_CAP_S390_MEM_OP_EXTENSION); ++ mem_op_storage_key_support = cap_mem_op_extension > 0; + cap_s390_irq = kvm_check_extension(s, KVM_CAP_S390_INJECT_IRQ); + cap_vcpu_resets = kvm_check_extension(s, KVM_CAP_S390_VCPU_RESETS); + cap_protected = kvm_check_extension(s, KVM_CAP_S390_PROTECTED); +@@ -843,6 +848,7 @@ int kvm_s390_mem_op(S390CPU *cpu, vaddr addr, uint8_t ar, void *hostbuf, + : KVM_S390_MEMOP_LOGICAL_READ, + .buf = (uint64_t)hostbuf, + .ar = ar, ++ .key = (cpu->env.psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY, + }; + int ret; + +@@ -852,6 +858,9 @@ int kvm_s390_mem_op(S390CPU *cpu, vaddr addr, uint8_t ar, void *hostbuf, + if (!hostbuf) { + mem_op.flags |= KVM_S390_MEMOP_F_CHECK_ONLY; + } ++ if (mem_op_storage_key_support) { ++ mem_op.flags |= KVM_S390_MEMOP_F_SKEY_PROTECTION; ++ } + + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_S390_MEM_OP, &mem_op); + if (ret < 0) { +-- +2.31.1 + diff --git a/kvm-tests-acpi-SLIC-update-expected-blobs.patch b/kvm-tests-acpi-SLIC-update-expected-blobs.patch new file mode 100644 index 0000000..4d5fc35 --- /dev/null +++ b/kvm-tests-acpi-SLIC-update-expected-blobs.patch @@ -0,0 +1,47 @@ +From 0f5984bd89d481bf2494d4b3c36ef80350f44811 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 30 Mar 2022 14:52:34 -0400 +Subject: [PATCH 12/18] tests: acpi: SLIC: update expected blobs + +RH-Author: Jon Maloy +RH-MergeRequest: 141: acpi: fix QEMU crash when started with SLIC table +RH-Commit: [4/10] ca28e5c57f9eb432e5ad6b1cb7ef646a86890dd5 (jmaloy/qemu-kvm) +RH-Bugzilla: 2062611 +RH-Acked-by: Igor Mammedov + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2062611 +Upstream: Merged + +commit c8adb4d222c42951a9d0367e5f5d4e1f5e2c9ad7 +Author: Igor Mammedov +Date: Mon Dec 27 14:31:20 2021 -0500 + + tests: acpi: SLIC: update expected blobs + + Signed-off-by: Igor Mammedov + Message-Id: <20211227193120.1084176-5-imammedo@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit c8adb4d222c42951a9d0367e5f5d4e1f5e2c9ad7) +Signed-off-by: Jon Maloy +--- + tests/data/acpi/q35/FACP.slic | Bin 244 -> 244 bytes + tests/data/acpi/q35/SLIC.slic | Bin 0 -> 36 bytes + tests/qtest/bios-tables-test-allowed-diff.h | 2 -- + 3 files changed, 2 deletions(-) + +literal 0 +HcmV?d00001 + +diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h +index 49dbf8fa3e..dfb8523c8b 100644 +--- a/tests/qtest/bios-tables-test-allowed-diff.h ++++ b/tests/qtest/bios-tables-test-allowed-diff.h +@@ -1,3 +1 @@ + /* List of comma-separated changed AML files to ignore */ +-"tests/data/acpi/q35/FACP.slic", +-"tests/data/acpi/q35/SLIC.slic", +-- +2.27.0 + diff --git a/kvm-tests-acpi-add-SLIC-table-test.patch b/kvm-tests-acpi-add-SLIC-table-test.patch new file mode 100644 index 0000000..9e54a7f --- /dev/null +++ b/kvm-tests-acpi-add-SLIC-table-test.patch @@ -0,0 +1,76 @@ +From 341715473c2a71f11a3888420a0caecf27ed4eb5 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 30 Mar 2022 14:52:34 -0400 +Subject: [PATCH 11/18] tests: acpi: add SLIC table test + +RH-Author: Jon Maloy +RH-MergeRequest: 141: acpi: fix QEMU crash when started with SLIC table +RH-Commit: [3/10] baac9b82c16a50eb4640fd7146775c9d507c7b21 (jmaloy/qemu-kvm) +RH-Bugzilla: 2062611 +RH-Acked-by: Igor Mammedov + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2062611 +Upstream: Merged + +commit 11edfabee443b149468a82b5efc88c96d1d259ec +Author: Igor Mammedov +Date: Mon Dec 27 14:31:19 2021 -0500 + + tests: acpi: add SLIC table test + + When user uses '-acpitable' to add SLIC table, some ACPI + tables (FADT) will change its 'Oem ID'/'Oem Table ID' fields to + match that of SLIC. Test makes sure thati QEMU handles + those fields correctly when SLIC table is added with + '-acpitable' option. + + Conflicts: tests/qtest/bios-tables-test.c + due to missing 39d7554b2009 ("tests/acpi: add test case for VIOT") + + Signed-off-by: Igor Mammedov + Message-Id: <20211227193120.1084176-4-imammedo@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit 11edfabee443b149468a82b5efc88c96d1d259ec) +Signed-off-by: Jon Maloy +--- + tests/qtest/bios-tables-test.c | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c +index 16d8304cde..e159b71136 100644 +--- a/tests/qtest/bios-tables-test.c ++++ b/tests/qtest/bios-tables-test.c +@@ -1467,6 +1467,20 @@ static void test_acpi_virt_tcg(void) + free_test_data(&data); + } + ++static void test_acpi_q35_slic(void) ++{ ++ test_data data = { ++ .machine = MACHINE_Q35, ++ .variant = ".slic", ++ }; ++ ++ test_acpi_one("-acpitable sig=SLIC,oem_id='CRASH ',oem_table_id='ME'," ++ "oem_rev=00002210,asl_compiler_id='qemu'," ++ "asl_compiler_rev=00000000,data=/dev/null", ++ &data); ++ free_test_data(&data); ++} ++ + static void test_oem_fields(test_data *data) + { + int i; +@@ -1641,6 +1655,7 @@ int main(int argc, char *argv[]) + qtest_add_func("acpi/q35/kvm/xapic", test_acpi_q35_kvm_xapic); + qtest_add_func("acpi/q35/kvm/dmar", test_acpi_q35_kvm_dmar); + } ++ qtest_add_func("acpi/q35/slic", test_acpi_q35_slic); + } else if (strcmp(arch, "aarch64") == 0) { + if (has_tcg) { + qtest_add_func("acpi/virt", test_acpi_virt_tcg); +-- +2.27.0 + diff --git a/kvm-tests-acpi-manually-pad-OEM_ID-OEM_TABLE_ID-for-test.patch b/kvm-tests-acpi-manually-pad-OEM_ID-OEM_TABLE_ID-for-test.patch new file mode 100644 index 0000000..05a6838 --- /dev/null +++ b/kvm-tests-acpi-manually-pad-OEM_ID-OEM_TABLE_ID-for-test.patch @@ -0,0 +1,84 @@ +From d94b3278c84cf7451489631d804a6b5cbd28a59d Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 30 Mar 2022 14:52:34 -0400 +Subject: [PATCH 13/18] tests: acpi: manually pad OEM_ID/OEM_TABLE_ID for + test_oem_fields() test + +RH-Author: Jon Maloy +RH-MergeRequest: 141: acpi: fix QEMU crash when started with SLIC table +RH-Commit: [5/10] 4ec8c738acec178c2f005f189b0c2a77a7af4088 (jmaloy/qemu-kvm) +RH-Bugzilla: 2062611 +RH-Acked-by: Igor Mammedov + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2062611 +Upstream: Merged + +commit a849522f726767022203ef2b6c395ea19facb866 +Author: Igor Mammedov +Date: Wed Jan 12 08:03:29 2022 -0500 + + tests: acpi: manually pad OEM_ID/OEM_TABLE_ID for test_oem_fields() test + + The next commit will revert OEM fields padding with whitespace to + padding with '\0' as it was before [1]. As result test_oem_fields() will + fail due to unexpectedly smaller ID sizes read from QEMU ACPI tables. + + Pad OEM_ID/OEM_TABLE_ID manually with spaces so that values the test + puts on QEMU CLI and expected values match. + + 1) 602b458201 ("acpi: Permit OEM ID and OEM table ID fields to be changed") + Signed-off-by: Igor Mammedov + Message-Id: <20220112130332.1648664-2-imammedo@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit a849522f726767022203ef2b6c395ea19facb866) +Signed-off-by: Jon Maloy +--- + tests/qtest/bios-tables-test.c | 15 ++++++--------- + 1 file changed, 6 insertions(+), 9 deletions(-) + +diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c +index e159b71136..348fdbd202 100644 +--- a/tests/qtest/bios-tables-test.c ++++ b/tests/qtest/bios-tables-test.c +@@ -71,9 +71,10 @@ + + #define ACPI_REBUILD_EXPECTED_AML "TEST_ACPI_REBUILD_AML" + +-#define OEM_ID "TEST" +-#define OEM_TABLE_ID "OEM" +-#define OEM_TEST_ARGS "-machine x-oem-id="OEM_ID",x-oem-table-id="OEM_TABLE_ID ++#define OEM_ID "TEST " ++#define OEM_TABLE_ID "OEM " ++#define OEM_TEST_ARGS "-machine x-oem-id='" OEM_ID "',x-oem-table-id='" \ ++ OEM_TABLE_ID "'" + + typedef struct { + bool tcg_only; +@@ -1484,11 +1485,7 @@ static void test_acpi_q35_slic(void) + static void test_oem_fields(test_data *data) + { + int i; +- char oem_id[6]; +- char oem_table_id[8]; + +- strpadcpy(oem_id, sizeof oem_id, OEM_ID, ' '); +- strpadcpy(oem_table_id, sizeof oem_table_id, OEM_TABLE_ID, ' '); + for (i = 0; i < data->tables->len; ++i) { + AcpiSdtTable *sdt; + +@@ -1498,8 +1495,8 @@ static void test_oem_fields(test_data *data) + continue; + } + +- g_assert(memcmp(sdt->aml + 10, oem_id, 6) == 0); +- g_assert(memcmp(sdt->aml + 16, oem_table_id, 8) == 0); ++ g_assert(memcmp(sdt->aml + 10, OEM_ID, 6) == 0); ++ g_assert(memcmp(sdt->aml + 16, OEM_TABLE_ID, 8) == 0); + } + } + +-- +2.27.0 + diff --git a/kvm-tests-acpi-test-short-OEM_ID-OEM_TABLE_ID-values-in-.patch b/kvm-tests-acpi-test-short-OEM_ID-OEM_TABLE_ID-values-in-.patch new file mode 100644 index 0000000..66d62e5 --- /dev/null +++ b/kvm-tests-acpi-test-short-OEM_ID-OEM_TABLE_ID-values-in-.patch @@ -0,0 +1,77 @@ +From 485bf2eb8edabd4553d995d5e32224df1e510aa2 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 30 Mar 2022 14:52:34 -0400 +Subject: [PATCH 17/18] tests: acpi: test short OEM_ID/OEM_TABLE_ID values in + test_oem_fields() + +RH-Author: Jon Maloy +RH-MergeRequest: 141: acpi: fix QEMU crash when started with SLIC table +RH-Commit: [9/10] 31339223fb6c6cc32185b9fdaac76f2709b17ad6 (jmaloy/qemu-kvm) +RH-Bugzilla: 2062611 +RH-Acked-by: Igor Mammedov + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2062611 +Upstream: Merged + +commit 408ca92634770de5eac7965ed97c6260e770f2e7 +Author: Igor Mammedov +Date: Fri Jan 14 09:26:41 2022 -0500 + + tests: acpi: test short OEM_ID/OEM_TABLE_ID values in test_oem_fields() + + Previous patch [1] added explicit whitespace padding to OEM_ID/OEM_TABLE_ID + values used in test_oem_fields() testcase to avoid false positive and + bisection issues when QEMU is switched to \0' padding. As result + testcase ceased to test values that were shorter than max possible + length values. + + Update testcase to make sure that it's testing shorter IDs like it + used to before [2]. + + 1) "tests: acpi: manually pad OEM_ID/OEM_TABLE_ID for test_oem_fields() test" + 2) 602b458201 ("acpi: Permit OEM ID and OEM table ID fields to be changed") + + Signed-off-by: Igor Mammedov + Message-Id: <20220114142641.1727679-1-imammedo@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit 408ca92634770de5eac7965ed97c6260e770f2e7) +Signed-off-by: Jon Maloy +--- + tests/qtest/bios-tables-test.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c +index 348fdbd202..515a647490 100644 +--- a/tests/qtest/bios-tables-test.c ++++ b/tests/qtest/bios-tables-test.c +@@ -71,10 +71,10 @@ + + #define ACPI_REBUILD_EXPECTED_AML "TEST_ACPI_REBUILD_AML" + +-#define OEM_ID "TEST " +-#define OEM_TABLE_ID "OEM " +-#define OEM_TEST_ARGS "-machine x-oem-id='" OEM_ID "',x-oem-table-id='" \ +- OEM_TABLE_ID "'" ++#define OEM_ID "TEST" ++#define OEM_TABLE_ID "OEM" ++#define OEM_TEST_ARGS "-machine x-oem-id=" OEM_ID ",x-oem-table-id=" \ ++ OEM_TABLE_ID + + typedef struct { + bool tcg_only; +@@ -1495,8 +1495,8 @@ static void test_oem_fields(test_data *data) + continue; + } + +- g_assert(memcmp(sdt->aml + 10, OEM_ID, 6) == 0); +- g_assert(memcmp(sdt->aml + 16, OEM_TABLE_ID, 8) == 0); ++ g_assert(strncmp((char *)sdt->aml + 10, OEM_ID, 6) == 0); ++ g_assert(strncmp((char *)sdt->aml + 16, OEM_TABLE_ID, 8) == 0); + } + } + +-- +2.27.0 + diff --git a/kvm-tests-acpi-update-expected-blobs.patch b/kvm-tests-acpi-update-expected-blobs.patch new file mode 100644 index 0000000..8f300c4 --- /dev/null +++ b/kvm-tests-acpi-update-expected-blobs.patch @@ -0,0 +1,58 @@ +From 4785d2a77fbea681975e5c48ae6a1be49058e089 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 30 Mar 2022 14:52:34 -0400 +Subject: [PATCH 16/18] tests: acpi: update expected blobs + +RH-Author: Jon Maloy +RH-MergeRequest: 141: acpi: fix QEMU crash when started with SLIC table +RH-Commit: [8/10] e069c5de88f34393d65d32b60380865832820302 (jmaloy/qemu-kvm) +RH-Bugzilla: 2062611 +RH-Acked-by: Igor Mammedov + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2062611 +Upstream: Merged + +commit 5adc3aba875416b0e077d8a29ddd0357883746f4 +Author: Igor Mammedov +Date: Wed Jan 12 08:03:32 2022 -0500 + + tests: acpi: update expected blobs + + Expected changes caused by previous commit: + + nvdimm ssdt (q35/pc/virt): + - * OEM Table ID "NVDIMM " + + * OEM Table ID "NVDIMM" + + SLIC test FADT (tests/data/acpi/q35/FACP.slic): + -[010h 0016 8] Oem Table ID : "ME " + +[010h 0016 8] Oem Table ID : "ME" + + Signed-off-by: Igor Mammedov + Message-Id: <20220112130332.1648664-5-imammedo@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit 5adc3aba875416b0e077d8a29ddd0357883746f4) +Signed-off-by: Jon Maloy +--- + tests/data/acpi/pc/SSDT.dimmpxm | Bin 734 -> 734 bytes + tests/data/acpi/q35/FACP.slic | Bin 244 -> 244 bytes + tests/data/acpi/q35/SSDT.dimmpxm | Bin 734 -> 734 bytes + tests/data/acpi/virt/SSDT.memhp | Bin 736 -> 736 bytes + tests/qtest/bios-tables-test-allowed-diff.h | 4 ---- + 5 files changed, 4 deletions(-) + +diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h +index 7faa8f53be..dfb8523c8b 100644 +--- a/tests/qtest/bios-tables-test-allowed-diff.h ++++ b/tests/qtest/bios-tables-test-allowed-diff.h +@@ -1,5 +1 @@ + /* List of comma-separated changed AML files to ignore */ +-"tests/data/acpi/virt/SSDT.memhp", +-"tests/data/acpi/pc/SSDT.dimmpxm", +-"tests/data/acpi/q35/SSDT.dimmpxm", +-"tests/data/acpi/q35/FACP.slic", +-- +2.27.0 + diff --git a/kvm-tests-acpi-whitelist-expected-blobs-before-changing-.patch b/kvm-tests-acpi-whitelist-expected-blobs-before-changing-.patch new file mode 100644 index 0000000..4a1b350 --- /dev/null +++ b/kvm-tests-acpi-whitelist-expected-blobs-before-changing-.patch @@ -0,0 +1,47 @@ +From 4e6482073df85db5982aa03ab0355e632b7157fc Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 30 Mar 2022 14:52:34 -0400 +Subject: [PATCH 10/18] tests: acpi: whitelist expected blobs before changing + them + +RH-Author: Jon Maloy +RH-MergeRequest: 141: acpi: fix QEMU crash when started with SLIC table +RH-Commit: [2/10] c664ecad30ca9c13025a63bb31ae7b80fd63e4df (jmaloy/qemu-kvm) +RH-Bugzilla: 2062611 +RH-Acked-by: Igor Mammedov + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2062611 +Upstream: Merged + +commit e71f6ab9d93a7d01e833647e7010c1079c4cef30 +Author: Igor Mammedov +Date: Mon Dec 27 14:31:18 2021 -0500 + + tests: acpi: whitelist expected blobs before changing them + + Signed-off-by: Igor Mammedov + Message-Id: <20211227193120.1084176-3-imammedo@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit e71f6ab9d93a7d01e833647e7010c1079c4cef30) +Signed-off-by: Jon Maloy +--- + tests/data/acpi/q35/FACP.slic | Bin 0 -> 244 bytes + tests/data/acpi/q35/SLIC.slic | 0 + tests/qtest/bios-tables-test-allowed-diff.h | 2 ++ + 3 files changed, 2 insertions(+) + create mode 100644 tests/data/acpi/q35/FACP.slic + create mode 100644 tests/data/acpi/q35/SLIC.slic + +diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h +index dfb8523c8b..49dbf8fa3e 100644 +--- a/tests/qtest/bios-tables-test-allowed-diff.h ++++ b/tests/qtest/bios-tables-test-allowed-diff.h +@@ -1 +1,3 @@ + /* List of comma-separated changed AML files to ignore */ ++"tests/data/acpi/q35/FACP.slic", ++"tests/data/acpi/q35/SLIC.slic", +-- +2.27.0 + diff --git a/kvm-tests-acpi-whitelist-nvdimm-s-SSDT-and-FACP.slic-exp.patch b/kvm-tests-acpi-whitelist-nvdimm-s-SSDT-and-FACP.slic-exp.patch new file mode 100644 index 0000000..30289c7 --- /dev/null +++ b/kvm-tests-acpi-whitelist-nvdimm-s-SSDT-and-FACP.slic-exp.patch @@ -0,0 +1,57 @@ +From a132a22e316121cf00ff733afb1ad1dc313e14b3 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 30 Mar 2022 14:52:34 -0400 +Subject: [PATCH 14/18] tests: acpi: whitelist nvdimm's SSDT and FACP.slic + expected blobs + +RH-Author: Jon Maloy +RH-MergeRequest: 141: acpi: fix QEMU crash when started with SLIC table +RH-Commit: [6/10] 3f3a929cde82f228da1e4bc66e4c869467c0289c (jmaloy/qemu-kvm) +RH-Bugzilla: 2062611 +RH-Acked-by: Igor Mammedov + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2062611 +Upstream: Merged + +commit d1e4a4654154925eddf0fc449fa9c92b806b9c8c +Author: Igor Mammedov +Date: Wed Jan 12 08:03:30 2022 -0500 + + tests: acpi: whitelist nvdimm's SSDT and FACP.slic expected blobs + + The next commit will revert OEM fields whitespace padding to + padding with '\0' as it was before [1]. That will change OEM + Table ID for: + * SSDT.*: where it was padded from 6 characters to 8 + * FACP.slic: where it was padded from 2 characters to 8 + after reverting whitespace padding, it will be replaced with + '\0' which effectively will shorten OEM table ID to 6 and 2 + characters. + + Whitelist affected tables before introducing the change. + + 1) 602b458201 ("acpi: Permit OEM ID and OEM table ID fields to be changed") + Signed-off-by: Igor Mammedov + Message-Id: <20220112130332.1648664-3-imammedo@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit d1e4a4654154925eddf0fc449fa9c92b806b9c8c) +Signed-off-by: Jon Maloy +--- + tests/qtest/bios-tables-test-allowed-diff.h | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h +index dfb8523c8b..7faa8f53be 100644 +--- a/tests/qtest/bios-tables-test-allowed-diff.h ++++ b/tests/qtest/bios-tables-test-allowed-diff.h +@@ -1 +1,5 @@ + /* List of comma-separated changed AML files to ignore */ ++"tests/data/acpi/virt/SSDT.memhp", ++"tests/data/acpi/pc/SSDT.dimmpxm", ++"tests/data/acpi/q35/SSDT.dimmpxm", ++"tests/data/acpi/q35/FACP.slic", +-- +2.27.0 + diff --git a/kvm-tests-avocado-update-aarch64_virt-test-to-exercise-c.patch b/kvm-tests-avocado-update-aarch64_virt-test-to-exercise-c.patch new file mode 100644 index 0000000..4fcf786 --- /dev/null +++ b/kvm-tests-avocado-update-aarch64_virt-test-to-exercise-c.patch @@ -0,0 +1,157 @@ +From f52aa60217634c96fef59ce76b803a94610bf5c8 Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Wed, 15 Jun 2022 15:28:27 +0200 +Subject: [PATCH 01/18] tests/avocado: update aarch64_virt test to exercise + -cpu max +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Daniel P. Berrangé +RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models +RH-Commit: [1/6] df6839e567180a4c32afd98852f68b2279e00f7c (berrange/centos-src-qemu) +RH-Bugzilla: 2060839 +RH-Acked-by: Thomas Huth +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2066824 + +commit 11593544df6f8febb3ce87015c22b429bf43c4c7 +Author: Alex Bennée +Date: Tue Apr 19 10:09:56 2022 +0100 + + tests/avocado: update aarch64_virt test to exercise -cpu max + + The Fedora 29 kernel is quite old and importantly fails when running + in LPA2 scenarios. As it's not really exercising much of the CPU space + replace it with a custom 5.16.12 kernel with all the architecture + options turned on. There is a minimal buildroot initramfs included in + the kernel which has a few tools for stress testing the memory + subsystem. The userspace also targets the Neoverse N1 processor so + would fail with a v8.0 cpu like cortex-a53. + + While we are at it move the test into its own file so it can have an + assigned maintainer. + + Signed-off-by: Alex Bennée + Acked-by: Richard Henderson + Tested-by: Richard Henderson + Message-Id: <20220419091020.3008144-2-alex.bennee@linaro.org> + +Signed-off-by: Andrew Jones +--- + MAINTAINERS | 1 + + tests/avocado/boot_linux_console.py | 25 ------------- + tests/avocado/machine_aarch64_virt.py | 51 +++++++++++++++++++++++++++ + 3 files changed, 52 insertions(+), 25 deletions(-) + create mode 100644 tests/avocado/machine_aarch64_virt.py + +diff --git a/MAINTAINERS b/MAINTAINERS +index 2fe20a49ab..bfe8806f60 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -942,6 +942,7 @@ S: Maintained + F: hw/arm/virt* + F: include/hw/arm/virt.h + F: docs/system/arm/virt.rst ++F: tests/avocado/machine_aarch64_virt.py + + Xilinx Zynq + M: Edgar E. Iglesias +diff --git a/tests/avocado/boot_linux_console.py b/tests/avocado/boot_linux_console.py +index b40a3abc81..45a2ceda22 100644 +--- a/tests/avocado/boot_linux_console.py ++++ b/tests/avocado/boot_linux_console.py +@@ -325,31 +325,6 @@ def test_mips_malta32el_nanomips_64k_dbg(self): + kernel_hash = '18d1c68f2e23429e266ca39ba5349ccd0aeb7180' + self.do_test_mips_malta32el_nanomips(kernel_url, kernel_hash) + +- def test_aarch64_virt(self): +- """ +- :avocado: tags=arch:aarch64 +- :avocado: tags=machine:virt +- :avocado: tags=accel:tcg +- :avocado: tags=cpu:cortex-a53 +- """ +- kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora' +- '/linux/releases/29/Everything/aarch64/os/images/pxeboot' +- '/vmlinuz') +- kernel_hash = '8c73e469fc6ea06a58dc83a628fc695b693b8493' +- kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash) +- +- self.vm.set_console() +- kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + +- 'console=ttyAMA0') +- self.require_accelerator("tcg") +- self.vm.add_args('-cpu', 'cortex-a53', +- '-accel', 'tcg', +- '-kernel', kernel_path, +- '-append', kernel_command_line) +- self.vm.launch() +- console_pattern = 'Kernel command line: %s' % kernel_command_line +- self.wait_for_console_pattern(console_pattern) +- + def test_aarch64_xlnx_versal_virt(self): + """ + :avocado: tags=arch:aarch64 +diff --git a/tests/avocado/machine_aarch64_virt.py b/tests/avocado/machine_aarch64_virt.py +new file mode 100644 +index 0000000000..21848cba70 +--- /dev/null ++++ b/tests/avocado/machine_aarch64_virt.py +@@ -0,0 +1,51 @@ ++# Functional test that boots a Linux kernel and checks the console ++# ++# Copyright (c) 2022 Linaro Ltd. ++# ++# Author: ++# Alex Bennée ++# ++# SPDX-License-Identifier: GPL-2.0-or-later ++ ++import time ++ ++from avocado_qemu import QemuSystemTest ++from avocado_qemu import wait_for_console_pattern ++from avocado_qemu import exec_command ++ ++class Aarch64VirtMachine(QemuSystemTest): ++ KERNEL_COMMON_COMMAND_LINE = 'printk.time=0 ' ++ ++ def wait_for_console_pattern(self, success_message, vm=None): ++ wait_for_console_pattern(self, success_message, ++ failure_message='Kernel panic - not syncing', ++ vm=vm) ++ ++ def test_aarch64_virt(self): ++ """ ++ :avocado: tags=arch:aarch64 ++ :avocado: tags=machine:virt ++ :avocado: tags=accel:tcg ++ :avocado: tags=cpu:max ++ """ ++ kernel_url = ('https://fileserver.linaro.org/s/' ++ 'z6B2ARM7DQT3HWN/download') ++ ++ kernel_hash = 'ed11daab50c151dde0e1e9c9cb8b2d9bd3215347' ++ kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash) ++ ++ self.vm.set_console() ++ kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + ++ 'console=ttyAMA0') ++ self.require_accelerator("tcg") ++ self.vm.add_args('-cpu', 'max,pauth-impdef=on', ++ '-accel', 'tcg', ++ '-kernel', kernel_path, ++ '-append', kernel_command_line) ++ self.vm.launch() ++ self.wait_for_console_pattern('Welcome to Buildroot') ++ time.sleep(0.1) ++ exec_command(self, 'root') ++ time.sleep(0.1) ++ exec_command(self, 'cat /proc/self/maps') ++ time.sleep(0.1) +-- +2.35.3 + diff --git a/kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch b/kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch new file mode 100644 index 0000000..7b9a8f3 --- /dev/null +++ b/kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch @@ -0,0 +1,119 @@ +From cea7b15c613a11ea15a1458d6990be7044df6643 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Thu, 18 Nov 2021 12:57:33 +0100 +Subject: [PATCH 17/17] tests/qtest/fdc-test: Add a regression test for + CVE-2021-3507 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 107: hw/block/fdc: Prevent end-of-track overrun (CVE-2021-3507) +RH-Commit: [2/2] 067c052df790959c28c1fcc16547676d36523bd9 (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1951522 +RH-Acked-by: Hanna Reitz +RH-Acked-by: Miroslav Rezanina + +Add the reproducer from https://gitlab.com/qemu-project/qemu/-/issues/339 + +Without the previous commit, when running 'make check-qtest-i386' +with QEMU configured with '--enable-sanitizers' we get: + + ==4028352==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x619000062a00 at pc 0x5626d03c491a bp 0x7ffdb4199410 sp 0x7ffdb4198bc0 + READ of size 786432 at 0x619000062a00 thread T0 + #0 0x5626d03c4919 in __asan_memcpy (qemu-system-i386+0x1e65919) + #1 0x5626d1c023cc in flatview_write_continue softmmu/physmem.c:2787:13 + #2 0x5626d1bf0c0f in flatview_write softmmu/physmem.c:2822:14 + #3 0x5626d1bf0798 in address_space_write softmmu/physmem.c:2914:18 + #4 0x5626d1bf0f37 in address_space_rw softmmu/physmem.c:2924:16 + #5 0x5626d1bf14c8 in cpu_physical_memory_rw softmmu/physmem.c:2933:5 + #6 0x5626d0bd5649 in cpu_physical_memory_write include/exec/cpu-common.h:82:5 + #7 0x5626d0bd0a07 in i8257_dma_write_memory hw/dma/i8257.c:452:9 + #8 0x5626d09f825d in fdctrl_transfer_handler hw/block/fdc.c:1616:13 + #9 0x5626d0a048b4 in fdctrl_start_transfer hw/block/fdc.c:1539:13 + #10 0x5626d09f4c3e in fdctrl_write_data hw/block/fdc.c:2266:13 + #11 0x5626d09f22f7 in fdctrl_write hw/block/fdc.c:829:9 + #12 0x5626d1c20bc5 in portio_write softmmu/ioport.c:207:17 + + 0x619000062a00 is located 0 bytes to the right of 512-byte region [0x619000062800,0x619000062a00) + allocated by thread T0 here: + #0 0x5626d03c66ec in posix_memalign (qemu-system-i386+0x1e676ec) + #1 0x5626d2b988d4 in qemu_try_memalign util/oslib-posix.c:210:11 + #2 0x5626d2b98b0c in qemu_memalign util/oslib-posix.c:226:27 + #3 0x5626d09fbaf0 in fdctrl_realize_common hw/block/fdc.c:2341:20 + #4 0x5626d0a150ed in isabus_fdc_realize hw/block/fdc-isa.c:113:5 + #5 0x5626d2367935 in device_set_realized hw/core/qdev.c:531:13 + + SUMMARY: AddressSanitizer: heap-buffer-overflow (qemu-system-i386+0x1e65919) in __asan_memcpy + Shadow bytes around the buggy address: + 0x0c32800044f0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa + 0x0c3280004500: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 0x0c3280004510: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 0x0c3280004520: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 0x0c3280004530: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + =>0x0c3280004540:[fa]fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa + 0x0c3280004550: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa + 0x0c3280004560: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa + 0x0c3280004570: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa + 0x0c3280004580: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa + 0x0c3280004590: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd + Shadow byte legend (one shadow byte represents 8 application bytes): + Addressable: 00 + Heap left redzone: fa + Freed heap region: fd + ==4028352==ABORTING + +[ kwolf: Added snapshot=on to prevent write file lock failure ] + +Reported-by: Alexander Bulekov +Signed-off-by: Philippe Mathieu-Daudé +Reviewed-by: Alexander Bulekov +Signed-off-by: Kevin Wolf +(cherry picked from commit 46609b90d9e3a6304def11038a76b58ff43f77bc) +Signed-off-by: Jon Maloy +--- + tests/qtest/fdc-test.c | 21 +++++++++++++++++++++ + 1 file changed, 21 insertions(+) + +diff --git a/tests/qtest/fdc-test.c b/tests/qtest/fdc-test.c +index b0d40012e6..1d4f852128 100644 +--- a/tests/qtest/fdc-test.c ++++ b/tests/qtest/fdc-test.c +@@ -583,6 +583,26 @@ static void test_cve_2021_20196(void) + qtest_quit(s); + } + ++static void test_cve_2021_3507(void) ++{ ++ QTestState *s; ++ ++ s = qtest_initf("-nographic -m 32M -nodefaults " ++ "-drive file=%s,format=raw,if=floppy,snapshot=on", ++ test_image); ++ qtest_outl(s, 0x9, 0x0a0206); ++ qtest_outw(s, 0x3f4, 0x1600); ++ qtest_outw(s, 0x3f4, 0x0000); ++ qtest_outw(s, 0x3f4, 0x0000); ++ qtest_outw(s, 0x3f4, 0x0000); ++ qtest_outw(s, 0x3f4, 0x0200); ++ qtest_outw(s, 0x3f4, 0x0200); ++ qtest_outw(s, 0x3f4, 0x0000); ++ qtest_outw(s, 0x3f4, 0x0000); ++ qtest_outw(s, 0x3f4, 0x0000); ++ qtest_quit(s); ++} ++ + int main(int argc, char **argv) + { + int fd; +@@ -614,6 +634,7 @@ int main(int argc, char **argv) + qtest_add_func("/fdc/read_no_dma_19", test_read_no_dma_19); + qtest_add_func("/fdc/fuzz-registers", fuzz_registers); + qtest_add_func("/fdc/fuzz/cve_2021_20196", test_cve_2021_20196); ++ qtest_add_func("/fdc/fuzz/cve_2021_3507", test_cve_2021_3507); + + ret = g_test_run(); + +-- +2.31.1 + diff --git a/kvm-util-event-loop-base-Introduce-options-to-set-the-th.patch b/kvm-util-event-loop-base-Introduce-options-to-set-the-th.patch new file mode 100644 index 0000000..77929a6 --- /dev/null +++ b/kvm-util-event-loop-base-Introduce-options-to-set-the-th.patch @@ -0,0 +1,385 @@ +From 7a6fa42d4a4263c94b9bf18290f9e7680ea9e7f4 Mon Sep 17 00:00:00 2001 +From: Nicolas Saenz Julienne +Date: Mon, 25 Apr 2022 09:57:23 +0200 +Subject: [PATCH 03/16] util/event-loop-base: Introduce options to set the + thread pool size + +RH-Author: Nicolas Saenz Julienne +RH-MergeRequest: 93: util/thread-pool: Expose minimum and maximum size +RH-Commit: [3/3] af78a88ff3c69701cbb5f9e980c3d6ebbd13ff98 +RH-Bugzilla: 2031024 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Stefan Hajnoczi + +The thread pool regulates itself: when idle, it kills threads until +empty, when in demand, it creates new threads until full. This behaviour +doesn't play well with latency sensitive workloads where the price of +creating a new thread is too high. For example, when paired with qemu's +'-mlock', or using safety features like SafeStack, creating a new thread +has been measured take multiple milliseconds. + +In order to mitigate this let's introduce a new 'EventLoopBase' +property to set the thread pool size. The threads will be created during +the pool's initialization or upon updating the property's value, remain +available during its lifetime regardless of demand, and destroyed upon +freeing it. A properly characterized workload will then be able to +configure the pool to avoid any latency spikes. + +Signed-off-by: Nicolas Saenz Julienne +Reviewed-by: Stefan Hajnoczi +Acked-by: Markus Armbruster +Message-id: 20220425075723.20019-4-nsaenzju@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 71ad4713cc1d7fca24388b828ef31ae6cb38a31c) +--- + event-loop-base.c | 23 +++++++++++++ + include/block/aio.h | 10 ++++++ + include/block/thread-pool.h | 3 ++ + include/sysemu/event-loop-base.h | 4 +++ + iothread.c | 3 ++ + qapi/qom.json | 10 +++++- + util/aio-posix.c | 1 + + util/async.c | 20 ++++++++++++ + util/main-loop.c | 9 ++++++ + util/thread-pool.c | 55 +++++++++++++++++++++++++++++--- + 10 files changed, 133 insertions(+), 5 deletions(-) + +diff --git a/event-loop-base.c b/event-loop-base.c +index e7f99a6ec8..d5be4dc6fc 100644 +--- a/event-loop-base.c ++++ b/event-loop-base.c +@@ -14,6 +14,7 @@ + #include "qemu/osdep.h" + #include "qom/object_interfaces.h" + #include "qapi/error.h" ++#include "block/thread-pool.h" + #include "sysemu/event-loop-base.h" + + typedef struct { +@@ -21,9 +22,22 @@ typedef struct { + ptrdiff_t offset; /* field's byte offset in EventLoopBase struct */ + } EventLoopBaseParamInfo; + ++static void event_loop_base_instance_init(Object *obj) ++{ ++ EventLoopBase *base = EVENT_LOOP_BASE(obj); ++ ++ base->thread_pool_max = THREAD_POOL_MAX_THREADS_DEFAULT; ++} ++ + static EventLoopBaseParamInfo aio_max_batch_info = { + "aio-max-batch", offsetof(EventLoopBase, aio_max_batch), + }; ++static EventLoopBaseParamInfo thread_pool_min_info = { ++ "thread-pool-min", offsetof(EventLoopBase, thread_pool_min), ++}; ++static EventLoopBaseParamInfo thread_pool_max_info = { ++ "thread-pool-max", offsetof(EventLoopBase, thread_pool_max), ++}; + + static void event_loop_base_get_param(Object *obj, Visitor *v, + const char *name, void *opaque, Error **errp) +@@ -95,12 +109,21 @@ static void event_loop_base_class_init(ObjectClass *klass, void *class_data) + event_loop_base_get_param, + event_loop_base_set_param, + NULL, &aio_max_batch_info); ++ object_class_property_add(klass, "thread-pool-min", "int", ++ event_loop_base_get_param, ++ event_loop_base_set_param, ++ NULL, &thread_pool_min_info); ++ object_class_property_add(klass, "thread-pool-max", "int", ++ event_loop_base_get_param, ++ event_loop_base_set_param, ++ NULL, &thread_pool_max_info); + } + + static const TypeInfo event_loop_base_info = { + .name = TYPE_EVENT_LOOP_BASE, + .parent = TYPE_OBJECT, + .instance_size = sizeof(EventLoopBase), ++ .instance_init = event_loop_base_instance_init, + .class_size = sizeof(EventLoopBaseClass), + .class_init = event_loop_base_class_init, + .abstract = true, +diff --git a/include/block/aio.h b/include/block/aio.h +index 5634173b12..d128558f1d 100644 +--- a/include/block/aio.h ++++ b/include/block/aio.h +@@ -192,6 +192,8 @@ struct AioContext { + QSLIST_HEAD(, Coroutine) scheduled_coroutines; + QEMUBH *co_schedule_bh; + ++ int thread_pool_min; ++ int thread_pool_max; + /* Thread pool for performing work and receiving completion callbacks. + * Has its own locking. + */ +@@ -769,4 +771,12 @@ void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, + void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch, + Error **errp); + ++/** ++ * aio_context_set_thread_pool_params: ++ * @ctx: the aio context ++ * @min: min number of threads to have readily available in the thread pool ++ * @min: max number of threads the thread pool can contain ++ */ ++void aio_context_set_thread_pool_params(AioContext *ctx, int64_t min, ++ int64_t max, Error **errp); + #endif +diff --git a/include/block/thread-pool.h b/include/block/thread-pool.h +index 7dd7d730a0..2020bcc92d 100644 +--- a/include/block/thread-pool.h ++++ b/include/block/thread-pool.h +@@ -20,6 +20,8 @@ + + #include "block/block.h" + ++#define THREAD_POOL_MAX_THREADS_DEFAULT 64 ++ + typedef int ThreadPoolFunc(void *opaque); + + typedef struct ThreadPool ThreadPool; +@@ -33,5 +35,6 @@ BlockAIOCB *thread_pool_submit_aio(ThreadPool *pool, + int coroutine_fn thread_pool_submit_co(ThreadPool *pool, + ThreadPoolFunc *func, void *arg); + void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg); ++void thread_pool_update_params(ThreadPool *pool, struct AioContext *ctx); + + #endif +diff --git a/include/sysemu/event-loop-base.h b/include/sysemu/event-loop-base.h +index fced4c9fea..2748bf6ae1 100644 +--- a/include/sysemu/event-loop-base.h ++++ b/include/sysemu/event-loop-base.h +@@ -33,5 +33,9 @@ struct EventLoopBase { + + /* AioContext AIO engine parameters */ + int64_t aio_max_batch; ++ ++ /* AioContext thread pool parameters */ ++ int64_t thread_pool_min; ++ int64_t thread_pool_max; + }; + #endif +diff --git a/iothread.c b/iothread.c +index 8fa2f3bfb8..529194a566 100644 +--- a/iothread.c ++++ b/iothread.c +@@ -174,6 +174,9 @@ static void iothread_set_aio_context_params(EventLoopBase *base, Error **errp) + aio_context_set_aio_params(iothread->ctx, + iothread->parent_obj.aio_max_batch, + errp); ++ ++ aio_context_set_thread_pool_params(iothread->ctx, base->thread_pool_min, ++ base->thread_pool_max, errp); + } + + +diff --git a/qapi/qom.json b/qapi/qom.json +index 7d4a2ac1b9..6a653c6636 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -508,10 +508,18 @@ + # 0 means that the engine will use its default. + # (default: 0) + # ++# @thread-pool-min: minimum number of threads reserved in the thread pool ++# (default:0) ++# ++# @thread-pool-max: maximum number of threads the thread pool can contain ++# (default:64) ++# + # Since: 7.1 + ## + { 'struct': 'EventLoopBaseProperties', +- 'data': { '*aio-max-batch': 'int' } } ++ 'data': { '*aio-max-batch': 'int', ++ '*thread-pool-min': 'int', ++ '*thread-pool-max': 'int' } } + + ## + # @IothreadProperties: +diff --git a/util/aio-posix.c b/util/aio-posix.c +index be0182a3c6..731f3826c0 100644 +--- a/util/aio-posix.c ++++ b/util/aio-posix.c +@@ -15,6 +15,7 @@ + + #include "qemu/osdep.h" + #include "block/block.h" ++#include "block/thread-pool.h" + #include "qemu/main-loop.h" + #include "qemu/rcu.h" + #include "qemu/rcu_queue.h" +diff --git a/util/async.c b/util/async.c +index 2ea1172f3e..554ba70cca 100644 +--- a/util/async.c ++++ b/util/async.c +@@ -563,6 +563,9 @@ AioContext *aio_context_new(Error **errp) + + ctx->aio_max_batch = 0; + ++ ctx->thread_pool_min = 0; ++ ctx->thread_pool_max = THREAD_POOL_MAX_THREADS_DEFAULT; ++ + return ctx; + fail: + g_source_destroy(&ctx->source); +@@ -696,3 +699,20 @@ void qemu_set_current_aio_context(AioContext *ctx) + assert(!get_my_aiocontext()); + set_my_aiocontext(ctx); + } ++ ++void aio_context_set_thread_pool_params(AioContext *ctx, int64_t min, ++ int64_t max, Error **errp) ++{ ++ ++ if (min > max || !max || min > INT_MAX || max > INT_MAX) { ++ error_setg(errp, "bad thread-pool-min/thread-pool-max values"); ++ return; ++ } ++ ++ ctx->thread_pool_min = min; ++ ctx->thread_pool_max = max; ++ ++ if (ctx->thread_pool) { ++ thread_pool_update_params(ctx->thread_pool, ctx); ++ } ++} +diff --git a/util/main-loop.c b/util/main-loop.c +index 5b13f456fa..a0f48186ab 100644 +--- a/util/main-loop.c ++++ b/util/main-loop.c +@@ -30,6 +30,7 @@ + #include "sysemu/replay.h" + #include "qemu/main-loop.h" + #include "block/aio.h" ++#include "block/thread-pool.h" + #include "qemu/error-report.h" + #include "qemu/queue.h" + #include "qemu/compiler.h" +@@ -187,12 +188,20 @@ int qemu_init_main_loop(Error **errp) + + static void main_loop_update_params(EventLoopBase *base, Error **errp) + { ++ ERRP_GUARD(); ++ + if (!qemu_aio_context) { + error_setg(errp, "qemu aio context not ready"); + return; + } + + aio_context_set_aio_params(qemu_aio_context, base->aio_max_batch, errp); ++ if (*errp) { ++ return; ++ } ++ ++ aio_context_set_thread_pool_params(qemu_aio_context, base->thread_pool_min, ++ base->thread_pool_max, errp); + } + + MainLoop *mloop; +diff --git a/util/thread-pool.c b/util/thread-pool.c +index d763cea505..196835b4d3 100644 +--- a/util/thread-pool.c ++++ b/util/thread-pool.c +@@ -58,7 +58,6 @@ struct ThreadPool { + QemuMutex lock; + QemuCond worker_stopped; + QemuSemaphore sem; +- int max_threads; + QEMUBH *new_thread_bh; + + /* The following variables are only accessed from one AioContext. */ +@@ -71,8 +70,27 @@ struct ThreadPool { + int new_threads; /* backlog of threads we need to create */ + int pending_threads; /* threads created but not running yet */ + bool stopping; ++ int min_threads; ++ int max_threads; + }; + ++static inline bool back_to_sleep(ThreadPool *pool, int ret) ++{ ++ /* ++ * The semaphore timed out, we should exit the loop except when: ++ * - There is work to do, we raced with the signal. ++ * - The max threads threshold just changed, we raced with the signal. ++ * - The thread pool forces a minimum number of readily available threads. ++ */ ++ if (ret == -1 && (!QTAILQ_EMPTY(&pool->request_list) || ++ pool->cur_threads > pool->max_threads || ++ pool->cur_threads <= pool->min_threads)) { ++ return true; ++ } ++ ++ return false; ++} ++ + static void *worker_thread(void *opaque) + { + ThreadPool *pool = opaque; +@@ -91,8 +109,9 @@ static void *worker_thread(void *opaque) + ret = qemu_sem_timedwait(&pool->sem, 10000); + qemu_mutex_lock(&pool->lock); + pool->idle_threads--; +- } while (ret == -1 && !QTAILQ_EMPTY(&pool->request_list)); +- if (ret == -1 || pool->stopping) { ++ } while (back_to_sleep(pool, ret)); ++ if (ret == -1 || pool->stopping || ++ pool->cur_threads > pool->max_threads) { + break; + } + +@@ -294,6 +313,33 @@ void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg) + thread_pool_submit_aio(pool, func, arg, NULL, NULL); + } + ++void thread_pool_update_params(ThreadPool *pool, AioContext *ctx) ++{ ++ qemu_mutex_lock(&pool->lock); ++ ++ pool->min_threads = ctx->thread_pool_min; ++ pool->max_threads = ctx->thread_pool_max; ++ ++ /* ++ * We either have to: ++ * - Increase the number available of threads until over the min_threads ++ * threshold. ++ * - Decrease the number of available threads until under the max_threads ++ * threshold. ++ * - Do nothing. The current number of threads fall in between the min and ++ * max thresholds. We'll let the pool manage itself. ++ */ ++ for (int i = pool->cur_threads; i < pool->min_threads; i++) { ++ spawn_thread(pool); ++ } ++ ++ for (int i = pool->cur_threads; i > pool->max_threads; i--) { ++ qemu_sem_post(&pool->sem); ++ } ++ ++ qemu_mutex_unlock(&pool->lock); ++} ++ + static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx) + { + if (!ctx) { +@@ -306,11 +352,12 @@ static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx) + qemu_mutex_init(&pool->lock); + qemu_cond_init(&pool->worker_stopped); + qemu_sem_init(&pool->sem, 0); +- pool->max_threads = 64; + pool->new_thread_bh = aio_bh_new(ctx, spawn_thread_bh_fn, pool); + + QLIST_INIT(&pool->head); + QTAILQ_INIT(&pool->request_list); ++ ++ thread_pool_update_params(pool, ctx); + } + + ThreadPool *thread_pool_new(AioContext *ctx) +-- +2.31.1 + diff --git a/kvm-util-main-loop-Introduce-the-main-loop-into-QOM.patch b/kvm-util-main-loop-Introduce-the-main-loop-into-QOM.patch new file mode 100644 index 0000000..2104424 --- /dev/null +++ b/kvm-util-main-loop-Introduce-the-main-loop-into-QOM.patch @@ -0,0 +1,233 @@ +From b4969662de01848f887a3918e97e516efc213f71 Mon Sep 17 00:00:00 2001 +From: Nicolas Saenz Julienne +Date: Mon, 25 Apr 2022 09:57:22 +0200 +Subject: [PATCH 02/16] util/main-loop: Introduce the main loop into QOM + +RH-Author: Nicolas Saenz Julienne +RH-MergeRequest: 93: util/thread-pool: Expose minimum and maximum size +RH-Commit: [2/3] a481b77e25ad50d13dcbe26b36c551b18c89bddd +RH-Bugzilla: 2031024 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Stefan Hajnoczi + +'event-loop-base' provides basic property handling for all 'AioContext' +based event loops. So let's define a new 'MainLoopClass' that inherits +from it. This will permit tweaking the main loop's properties through +qapi as well as through the command line using the '-object' keyword[1]. +Only one instance of 'MainLoopClass' might be created at any time. + +'EventLoopBaseClass' learns a new callback, 'can_be_deleted()' so as to +mark 'MainLoop' as non-deletable. + +[1] For example: + -object main-loop,id=main-loop,aio-max-batch= + +Signed-off-by: Nicolas Saenz Julienne +Reviewed-by: Stefan Hajnoczi +Acked-by: Markus Armbruster +Message-id: 20220425075723.20019-3-nsaenzju@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 70ac26b9e5ca8374bb3ef3f30b871726673c9f27) +--- + event-loop-base.c | 13 ++++++++ + include/qemu/main-loop.h | 10 ++++++ + include/sysemu/event-loop-base.h | 1 + + meson.build | 3 +- + qapi/qom.json | 13 ++++++++ + util/main-loop.c | 56 ++++++++++++++++++++++++++++++++ + 6 files changed, 95 insertions(+), 1 deletion(-) + +diff --git a/event-loop-base.c b/event-loop-base.c +index a924c73a7c..e7f99a6ec8 100644 +--- a/event-loop-base.c ++++ b/event-loop-base.c +@@ -73,10 +73,23 @@ static void event_loop_base_complete(UserCreatable *uc, Error **errp) + } + } + ++static bool event_loop_base_can_be_deleted(UserCreatable *uc) ++{ ++ EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(uc); ++ EventLoopBase *backend = EVENT_LOOP_BASE(uc); ++ ++ if (bc->can_be_deleted) { ++ return bc->can_be_deleted(backend); ++ } ++ ++ return true; ++} ++ + static void event_loop_base_class_init(ObjectClass *klass, void *class_data) + { + UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass); + ucc->complete = event_loop_base_complete; ++ ucc->can_be_deleted = event_loop_base_can_be_deleted; + + object_class_property_add(klass, "aio-max-batch", "int", + event_loop_base_get_param, +diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h +index d3750c8e76..20c9387654 100644 +--- a/include/qemu/main-loop.h ++++ b/include/qemu/main-loop.h +@@ -26,9 +26,19 @@ + #define QEMU_MAIN_LOOP_H + + #include "block/aio.h" ++#include "qom/object.h" ++#include "sysemu/event-loop-base.h" + + #define SIG_IPI SIGUSR1 + ++#define TYPE_MAIN_LOOP "main-loop" ++OBJECT_DECLARE_TYPE(MainLoop, MainLoopClass, MAIN_LOOP) ++ ++struct MainLoop { ++ EventLoopBase parent_obj; ++}; ++typedef struct MainLoop MainLoop; ++ + /** + * qemu_init_main_loop: Set up the process so that it can run the main loop. + * +diff --git a/include/sysemu/event-loop-base.h b/include/sysemu/event-loop-base.h +index 8e77d8b69f..fced4c9fea 100644 +--- a/include/sysemu/event-loop-base.h ++++ b/include/sysemu/event-loop-base.h +@@ -25,6 +25,7 @@ struct EventLoopBaseClass { + + void (*init)(EventLoopBase *base, Error **errp); + void (*update_params)(EventLoopBase *base, Error **errp); ++ bool (*can_be_deleted)(EventLoopBase *base); + }; + + struct EventLoopBase { +diff --git a/meson.build b/meson.build +index b9c919a55e..5a7c10e639 100644 +--- a/meson.build ++++ b/meson.build +@@ -2832,7 +2832,8 @@ libqemuutil = static_library('qemuutil', + sources: util_ss.sources() + stub_ss.sources() + genh, + dependencies: [util_ss.dependencies(), libm, threads, glib, socket, malloc, pixman]) + qemuutil = declare_dependency(link_with: libqemuutil, +- sources: genh + version_res) ++ sources: genh + version_res, ++ dependencies: [event_loop_base]) + + if have_system or have_user + decodetree = generator(find_program('scripts/decodetree.py'), +diff --git a/qapi/qom.json b/qapi/qom.json +index a2439533c5..7d4a2ac1b9 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -540,6 +540,17 @@ + '*poll-grow': 'int', + '*poll-shrink': 'int' } } + ++## ++# @MainLoopProperties: ++# ++# Properties for the main-loop object. ++# ++# Since: 7.1 ++## ++{ 'struct': 'MainLoopProperties', ++ 'base': 'EventLoopBaseProperties', ++ 'data': {} } ++ + ## + # @MemoryBackendProperties: + # +@@ -830,6 +841,7 @@ + { 'name': 'input-linux', + 'if': 'CONFIG_LINUX' }, + 'iothread', ++ 'main-loop', + { 'name': 'memory-backend-epc', + 'if': 'CONFIG_LINUX' }, + 'memory-backend-file', +@@ -895,6 +907,7 @@ + 'input-linux': { 'type': 'InputLinuxProperties', + 'if': 'CONFIG_LINUX' }, + 'iothread': 'IothreadProperties', ++ 'main-loop': 'MainLoopProperties', + 'memory-backend-epc': { 'type': 'MemoryBackendEpcProperties', + 'if': 'CONFIG_LINUX' }, + 'memory-backend-file': 'MemoryBackendFileProperties', +diff --git a/util/main-loop.c b/util/main-loop.c +index b7b0ce4ca0..5b13f456fa 100644 +--- a/util/main-loop.c ++++ b/util/main-loop.c +@@ -33,6 +33,7 @@ + #include "qemu/error-report.h" + #include "qemu/queue.h" + #include "qemu/compiler.h" ++#include "qom/object.h" + + #ifndef _WIN32 + #include +@@ -184,6 +185,61 @@ int qemu_init_main_loop(Error **errp) + return 0; + } + ++static void main_loop_update_params(EventLoopBase *base, Error **errp) ++{ ++ if (!qemu_aio_context) { ++ error_setg(errp, "qemu aio context not ready"); ++ return; ++ } ++ ++ aio_context_set_aio_params(qemu_aio_context, base->aio_max_batch, errp); ++} ++ ++MainLoop *mloop; ++ ++static void main_loop_init(EventLoopBase *base, Error **errp) ++{ ++ MainLoop *m = MAIN_LOOP(base); ++ ++ if (mloop) { ++ error_setg(errp, "only one main-loop instance allowed"); ++ return; ++ } ++ ++ main_loop_update_params(base, errp); ++ ++ mloop = m; ++ return; ++} ++ ++static bool main_loop_can_be_deleted(EventLoopBase *base) ++{ ++ return false; ++} ++ ++static void main_loop_class_init(ObjectClass *oc, void *class_data) ++{ ++ EventLoopBaseClass *bc = EVENT_LOOP_BASE_CLASS(oc); ++ ++ bc->init = main_loop_init; ++ bc->update_params = main_loop_update_params; ++ bc->can_be_deleted = main_loop_can_be_deleted; ++} ++ ++static const TypeInfo main_loop_info = { ++ .name = TYPE_MAIN_LOOP, ++ .parent = TYPE_EVENT_LOOP_BASE, ++ .class_init = main_loop_class_init, ++ .instance_size = sizeof(MainLoop), ++}; ++ ++static void main_loop_register_types(void) ++{ ++ type_register_static(&main_loop_info); ++} ++ ++type_init(main_loop_register_types) ++ + static int max_priority; + + #ifndef _WIN32 +-- +2.31.1 + diff --git a/kvm-vdpa-Add-device-migration-blocker.patch b/kvm-vdpa-Add-device-migration-blocker.patch new file mode 100644 index 0000000..1b83c98 --- /dev/null +++ b/kvm-vdpa-Add-device-migration-blocker.patch @@ -0,0 +1,106 @@ +From 8e0fdce814af4cfc84dce5e5920da989b1f1a86d Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 16:06:05 +0200 +Subject: [PATCH 26/32] vdpa: Add device migration blocker +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [26/27] 53d94d45b5e5e88f12b95f9b0f243696cfcbd7ce (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit c156d5bf2b142dcc06808ccee06882144f230aec +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:45 2022 +0200 + + vdpa: Add device migration blocker + + Since the vhost-vdpa device is exposing _F_LOG, adding a migration blocker if + it uses CVQ. + + However, qemu is able to migrate simple devices with no CVQ as long as + they use SVQ. To allow it, add a placeholder error to vhost_vdpa, and + only add to vhost_dev when used. vhost_dev machinery place the migration + blocker if needed. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-vdpa.c | 15 +++++++++++++++ + include/hw/virtio/vhost-vdpa.h | 1 + + 2 files changed, 16 insertions(+) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 49effe5462..e3e5bce4bb 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -20,6 +20,7 @@ + #include "hw/virtio/vhost-shadow-virtqueue.h" + #include "hw/virtio/vhost-vdpa.h" + #include "exec/address-spaces.h" ++#include "migration/blocker.h" + #include "qemu/main-loop.h" + #include "cpu.h" + #include "trace.h" +@@ -1020,6 +1021,13 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) + return true; + } + ++ if (v->migration_blocker) { ++ int r = migrate_add_blocker(v->migration_blocker, &err); ++ if (unlikely(r < 0)) { ++ return false; ++ } ++ } ++ + for (i = 0; i < v->shadow_vqs->len; ++i) { + VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i); + VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); +@@ -1062,6 +1070,10 @@ err: + vhost_svq_stop(svq); + } + ++ if (v->migration_blocker) { ++ migrate_del_blocker(v->migration_blocker); ++ } ++ + return false; + } + +@@ -1081,6 +1093,9 @@ static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev) + } + } + ++ if (v->migration_blocker) { ++ migrate_del_blocker(v->migration_blocker); ++ } + return true; + } + +diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h +index 1111d85643..d10a89303e 100644 +--- a/include/hw/virtio/vhost-vdpa.h ++++ b/include/hw/virtio/vhost-vdpa.h +@@ -35,6 +35,7 @@ typedef struct vhost_vdpa { + bool shadow_vqs_enabled; + /* IOVA mapping used by the Shadow Virtqueue */ + VhostIOVATree *iova_tree; ++ Error *migration_blocker; + GPtrArray *shadow_vqs; + const VhostShadowVirtqueueOps *shadow_vq_ops; + void *shadow_vq_ops_opaque; +-- +2.31.1 + diff --git a/kvm-vdpa-Add-x-svq-to-NetdevVhostVDPAOptions.patch b/kvm-vdpa-Add-x-svq-to-NetdevVhostVDPAOptions.patch new file mode 100644 index 0000000..8a7b600 --- /dev/null +++ b/kvm-vdpa-Add-x-svq-to-NetdevVhostVDPAOptions.patch @@ -0,0 +1,223 @@ +From 0b27781f9984c67625c49a516c3e38fbf5fa1b1b Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 16:06:16 +0200 +Subject: [PATCH 27/32] vdpa: Add x-svq to NetdevVhostVDPAOptions +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [27/27] bd85496c2a8c1ebf34f908fca2be2ab9852fd0e9 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 1576dbb5bbc49344c606e969ec749be70c0fd94e +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:46 2022 +0200 + + vdpa: Add x-svq to NetdevVhostVDPAOptions + + Finally offering the possibility to enable SVQ from the command line. + + Signed-off-by: Eugenio Pérez + Acked-by: Markus Armbruster + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + net/vhost-vdpa.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++-- + qapi/net.json | 9 +++++- + 2 files changed, 77 insertions(+), 4 deletions(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 8b76dac966..50672bcd66 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -75,6 +75,28 @@ const int vdpa_feature_bits[] = { + VHOST_INVALID_FEATURE_BIT + }; + ++/** Supported device specific feature bits with SVQ */ ++static const uint64_t vdpa_svq_device_features = ++ BIT_ULL(VIRTIO_NET_F_CSUM) | ++ BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | ++ BIT_ULL(VIRTIO_NET_F_MTU) | ++ BIT_ULL(VIRTIO_NET_F_MAC) | ++ BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | ++ BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) | ++ BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | ++ BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | ++ BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | ++ BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | ++ BIT_ULL(VIRTIO_NET_F_HOST_ECN) | ++ BIT_ULL(VIRTIO_NET_F_HOST_UFO) | ++ BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | ++ BIT_ULL(VIRTIO_NET_F_STATUS) | ++ BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | ++ BIT_ULL(VIRTIO_F_ANY_LAYOUT) | ++ BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | ++ BIT_ULL(VIRTIO_NET_F_RSC_EXT) | ++ BIT_ULL(VIRTIO_NET_F_STANDBY); ++ + VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc) + { + VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); +@@ -133,9 +155,13 @@ err_init: + static void vhost_vdpa_cleanup(NetClientState *nc) + { + VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); ++ struct vhost_dev *dev = &s->vhost_net->dev; + + qemu_vfree(s->cvq_cmd_out_buffer); + qemu_vfree(s->cvq_cmd_in_buffer); ++ if (dev->vq_index + dev->nvqs == dev->vq_index_end) { ++ g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete); ++ } + if (s->vhost_net) { + vhost_net_cleanup(s->vhost_net); + g_free(s->vhost_net); +@@ -437,7 +463,9 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + int vdpa_device_fd, + int queue_pair_index, + int nvqs, +- bool is_datapath) ++ bool is_datapath, ++ bool svq, ++ VhostIOVATree *iova_tree) + { + NetClientState *nc = NULL; + VhostVDPAState *s; +@@ -455,6 +483,8 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + + s->vhost_vdpa.device_fd = vdpa_device_fd; + s->vhost_vdpa.index = queue_pair_index; ++ s->vhost_vdpa.shadow_vqs_enabled = svq; ++ s->vhost_vdpa.iova_tree = iova_tree; + if (!is_datapath) { + s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size, + vhost_vdpa_net_cvq_cmd_page_len()); +@@ -465,6 +495,8 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + + s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; + s->vhost_vdpa.shadow_vq_ops_opaque = s; ++ error_setg(&s->vhost_vdpa.migration_blocker, ++ "Migration disabled: vhost-vdpa uses CVQ."); + } + ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); + if (ret) { +@@ -474,6 +506,14 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + return nc; + } + ++static int vhost_vdpa_get_iova_range(int fd, ++ struct vhost_vdpa_iova_range *iova_range) ++{ ++ int ret = ioctl(fd, VHOST_VDPA_GET_IOVA_RANGE, iova_range); ++ ++ return ret < 0 ? -errno : 0; ++} ++ + static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp) + { + int ret = ioctl(fd, VHOST_GET_FEATURES, features); +@@ -524,6 +564,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + uint64_t features; + int vdpa_device_fd; + g_autofree NetClientState **ncs = NULL; ++ g_autoptr(VhostIOVATree) iova_tree = NULL; + NetClientState *nc; + int queue_pairs, r, i, has_cvq = 0; + +@@ -551,22 +592,45 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + return queue_pairs; + } + ++ if (opts->x_svq) { ++ struct vhost_vdpa_iova_range iova_range; ++ ++ uint64_t invalid_dev_features = ++ features & ~vdpa_svq_device_features & ++ /* Transport are all accepted at this point */ ++ ~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START, ++ VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START); ++ ++ if (invalid_dev_features) { ++ error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64, ++ invalid_dev_features); ++ goto err_svq; ++ } ++ ++ vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range); ++ iova_tree = vhost_iova_tree_new(iova_range.first, iova_range.last); ++ } ++ + ncs = g_malloc0(sizeof(*ncs) * queue_pairs); + + for (i = 0; i < queue_pairs; i++) { + ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, +- vdpa_device_fd, i, 2, true); ++ vdpa_device_fd, i, 2, true, opts->x_svq, ++ iova_tree); + if (!ncs[i]) + goto err; + } + + if (has_cvq) { + nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, +- vdpa_device_fd, i, 1, false); ++ vdpa_device_fd, i, 1, false, ++ opts->x_svq, iova_tree); + if (!nc) + goto err; + } + ++ /* iova_tree ownership belongs to last NetClientState */ ++ g_steal_pointer(&iova_tree); + return 0; + + err: +@@ -575,6 +639,8 @@ err: + qemu_del_net_client(ncs[i]); + } + } ++ ++err_svq: + qemu_close(vdpa_device_fd); + + return -1; +diff --git a/qapi/net.json b/qapi/net.json +index b92f3f5fb4..92848e4362 100644 +--- a/qapi/net.json ++++ b/qapi/net.json +@@ -445,12 +445,19 @@ + # @queues: number of queues to be created for multiqueue vhost-vdpa + # (default: 1) + # ++# @x-svq: Start device with (experimental) shadow virtqueue. (Since 7.1) ++# (default: false) ++# ++# Features: ++# @unstable: Member @x-svq is experimental. ++# + # Since: 5.1 + ## + { 'struct': 'NetdevVhostVDPAOptions', + 'data': { + '*vhostdev': 'str', +- '*queues': 'int' } } ++ '*queues': 'int', ++ '*x-svq': {'type': 'bool', 'features' : [ 'unstable'] } } } + + ## + # @NetClientDriver: +-- +2.31.1 + diff --git a/kvm-vdpa-Avoid-compiler-to-squash-reads-to-used-idx.patch b/kvm-vdpa-Avoid-compiler-to-squash-reads-to-used-idx.patch new file mode 100644 index 0000000..acd45e0 --- /dev/null +++ b/kvm-vdpa-Avoid-compiler-to-squash-reads-to-used-idx.patch @@ -0,0 +1,65 @@ +From df06ce560ddfefde98bef822ec2020382059921f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 10/32] vdpa: Avoid compiler to squash reads to used idx +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [10/27] b28789302d4f64749da26f413763f918161d9b70 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit c381abc37f0aba42ed2e3b41cdace8f8438829e4 +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:29 2022 +0200 + + vdpa: Avoid compiler to squash reads to used idx + + In the next patch we will allow busypolling of this value. The compiler + have a running path where shadow_used_idx, last_used_idx, and vring used + idx are not modified within the same thread busypolling. + + This was not an issue before since we always cleared device event + notifier before checking it, and that could act as memory barrier. + However, the busypoll needs something similar to kernel READ_ONCE. + + Let's add it here, sepparated from the polling. + + Signed-off-by: Eugenio Pérez + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 3fbda1e3d4..9c46c3a8fa 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -327,11 +327,12 @@ static void vhost_handle_guest_kick_notifier(EventNotifier *n) + + static bool vhost_svq_more_used(VhostShadowVirtqueue *svq) + { ++ uint16_t *used_idx = &svq->vring.used->idx; + if (svq->last_used_idx != svq->shadow_used_idx) { + return true; + } + +- svq->shadow_used_idx = cpu_to_le16(svq->vring.used->idx); ++ svq->shadow_used_idx = cpu_to_le16(*(volatile uint16_t *)used_idx); + + return svq->last_used_idx != svq->shadow_used_idx; + } +-- +2.31.1 + diff --git a/kvm-vdpa-Buffer-CVQ-support-on-shadow-virtqueue.patch b/kvm-vdpa-Buffer-CVQ-support-on-shadow-virtqueue.patch new file mode 100644 index 0000000..243aec8 --- /dev/null +++ b/kvm-vdpa-Buffer-CVQ-support-on-shadow-virtqueue.patch @@ -0,0 +1,323 @@ +From 881945094c0e4d33614d40959bfc20e395f5a478 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 16:05:40 +0200 +Subject: [PATCH 24/32] vdpa: Buffer CVQ support on shadow virtqueue +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [24/27] 5486f80141a3ad968a32e782bdcdead32f417352 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 2df4dd31e194c94da7d28c02e92449f4a989fca9 +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:43 2022 +0200 + + vdpa: Buffer CVQ support on shadow virtqueue + + Introduce the control virtqueue support for vDPA shadow virtqueue. This + is needed for advanced networking features like rx filtering. + + Virtio-net control VQ copies the descriptors to qemu's VA, so we avoid + TOCTOU with the guest's or device's memory every time there is a device + model change. Otherwise, the guest could change the memory content in + the time between qemu and the device read it. + + To demonstrate command handling, VIRTIO_NET_F_CTRL_MACADDR is + implemented. If the virtio-net driver changes MAC the virtio-net device + model will be updated with the new one, and a rx filtering change event + will be raised. + + More cvq commands could be added here straightforwardly but they have + not been tested. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + net/vhost-vdpa.c | 213 +++++++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 205 insertions(+), 8 deletions(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 2e3b6b10d8..df42822463 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -33,6 +33,9 @@ typedef struct VhostVDPAState { + NetClientState nc; + struct vhost_vdpa vhost_vdpa; + VHostNetState *vhost_net; ++ ++ /* Control commands shadow buffers */ ++ void *cvq_cmd_out_buffer, *cvq_cmd_in_buffer; + bool started; + } VhostVDPAState; + +@@ -131,6 +134,8 @@ static void vhost_vdpa_cleanup(NetClientState *nc) + { + VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); + ++ qemu_vfree(s->cvq_cmd_out_buffer); ++ qemu_vfree(s->cvq_cmd_in_buffer); + if (s->vhost_net) { + vhost_net_cleanup(s->vhost_net); + g_free(s->vhost_net); +@@ -190,24 +195,191 @@ static NetClientInfo net_vhost_vdpa_info = { + .check_peer_type = vhost_vdpa_check_peer_type, + }; + ++static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) ++{ ++ VhostIOVATree *tree = v->iova_tree; ++ DMAMap needle = { ++ /* ++ * No need to specify size or to look for more translations since ++ * this contiguous chunk was allocated by us. ++ */ ++ .translated_addr = (hwaddr)(uintptr_t)addr, ++ }; ++ const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle); ++ int r; ++ ++ if (unlikely(!map)) { ++ error_report("Cannot locate expected map"); ++ return; ++ } ++ ++ r = vhost_vdpa_dma_unmap(v, map->iova, map->size + 1); ++ if (unlikely(r != 0)) { ++ error_report("Device cannot unmap: %s(%d)", g_strerror(r), r); ++ } ++ ++ vhost_iova_tree_remove(tree, map); ++} ++ ++static size_t vhost_vdpa_net_cvq_cmd_len(void) ++{ ++ /* ++ * MAC_TABLE_SET is the ctrl command that produces the longer out buffer. ++ * In buffer is always 1 byte, so it should fit here ++ */ ++ return sizeof(struct virtio_net_ctrl_hdr) + ++ 2 * sizeof(struct virtio_net_ctrl_mac) + ++ MAC_TABLE_ENTRIES * ETH_ALEN; ++} ++ ++static size_t vhost_vdpa_net_cvq_cmd_page_len(void) ++{ ++ return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size); ++} ++ ++/** Copy and map a guest buffer. */ ++static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, ++ const struct iovec *out_data, ++ size_t out_num, size_t data_len, void *buf, ++ size_t *written, bool write) ++{ ++ DMAMap map = {}; ++ int r; ++ ++ if (unlikely(!data_len)) { ++ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid legnth of %s buffer\n", ++ __func__, write ? "in" : "out"); ++ return false; ++ } ++ ++ *written = iov_to_buf(out_data, out_num, 0, buf, data_len); ++ map.translated_addr = (hwaddr)(uintptr_t)buf; ++ map.size = vhost_vdpa_net_cvq_cmd_page_len() - 1; ++ map.perm = write ? IOMMU_RW : IOMMU_RO, ++ r = vhost_iova_tree_map_alloc(v->iova_tree, &map); ++ if (unlikely(r != IOVA_OK)) { ++ error_report("Cannot map injected element"); ++ return false; ++ } ++ ++ r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf, ++ !write); ++ if (unlikely(r < 0)) { ++ goto dma_map_err; ++ } ++ ++ return true; ++ ++dma_map_err: ++ vhost_iova_tree_remove(v->iova_tree, &map); ++ return false; ++} ++ + /** +- * Forward buffer for the moment. ++ * Copy the guest element into a dedicated buffer suitable to be sent to NIC ++ * ++ * @iov: [0] is the out buffer, [1] is the in one ++ */ ++static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s, ++ VirtQueueElement *elem, ++ struct iovec *iov) ++{ ++ size_t in_copied; ++ bool ok; ++ ++ iov[0].iov_base = s->cvq_cmd_out_buffer; ++ ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, elem->out_sg, elem->out_num, ++ vhost_vdpa_net_cvq_cmd_len(), iov[0].iov_base, ++ &iov[0].iov_len, false); ++ if (unlikely(!ok)) { ++ return false; ++ } ++ ++ iov[1].iov_base = s->cvq_cmd_in_buffer; ++ ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, NULL, 0, ++ sizeof(virtio_net_ctrl_ack), iov[1].iov_base, ++ &in_copied, true); ++ if (unlikely(!ok)) { ++ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); ++ return false; ++ } ++ ++ iov[1].iov_len = sizeof(virtio_net_ctrl_ack); ++ return true; ++} ++ ++/** ++ * Do not forward commands not supported by SVQ. Otherwise, the device could ++ * accept it and qemu would not know how to update the device model. ++ */ ++static bool vhost_vdpa_net_cvq_validate_cmd(const struct iovec *out, ++ size_t out_num) ++{ ++ struct virtio_net_ctrl_hdr ctrl; ++ size_t n; ++ ++ n = iov_to_buf(out, out_num, 0, &ctrl, sizeof(ctrl)); ++ if (unlikely(n < sizeof(ctrl))) { ++ qemu_log_mask(LOG_GUEST_ERROR, ++ "%s: invalid legnth of out buffer %zu\n", __func__, n); ++ return false; ++ } ++ ++ switch (ctrl.class) { ++ case VIRTIO_NET_CTRL_MAC: ++ switch (ctrl.cmd) { ++ case VIRTIO_NET_CTRL_MAC_ADDR_SET: ++ return true; ++ default: ++ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid mac cmd %u\n", ++ __func__, ctrl.cmd); ++ }; ++ break; ++ default: ++ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid control class %u\n", ++ __func__, ctrl.class); ++ }; ++ ++ return false; ++} ++ ++/** ++ * Validate and copy control virtqueue commands. ++ * ++ * Following QEMU guidelines, we offer a copy of the buffers to the device to ++ * prevent TOCTOU bugs. + */ + static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, + VirtQueueElement *elem, + void *opaque) + { +- unsigned int n = elem->out_num + elem->in_num; +- g_autofree struct iovec *dev_buffers = g_new(struct iovec, n); ++ VhostVDPAState *s = opaque; + size_t in_len, dev_written; + virtio_net_ctrl_ack status = VIRTIO_NET_ERR; +- int r; ++ /* out and in buffers sent to the device */ ++ struct iovec dev_buffers[2] = { ++ { .iov_base = s->cvq_cmd_out_buffer }, ++ { .iov_base = s->cvq_cmd_in_buffer }, ++ }; ++ /* in buffer used for device model */ ++ const struct iovec in = { ++ .iov_base = &status, ++ .iov_len = sizeof(status), ++ }; ++ int r = -EINVAL; ++ bool ok; ++ ++ ok = vhost_vdpa_net_cvq_map_elem(s, elem, dev_buffers); ++ if (unlikely(!ok)) { ++ goto out; ++ } + +- memcpy(dev_buffers, elem->out_sg, elem->out_num); +- memcpy(dev_buffers + elem->out_num, elem->in_sg, elem->in_num); ++ ok = vhost_vdpa_net_cvq_validate_cmd(&dev_buffers[0], 1); ++ if (unlikely(!ok)) { ++ goto out; ++ } + +- r = vhost_svq_add(svq, &dev_buffers[0], elem->out_num, &dev_buffers[1], +- elem->in_num, elem); ++ r = vhost_svq_add(svq, &dev_buffers[0], 1, &dev_buffers[1], 1, elem); + if (unlikely(r != 0)) { + if (unlikely(r == -ENOSPC)) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", +@@ -224,6 +396,18 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, + dev_written = vhost_svq_poll(svq); + if (unlikely(dev_written < sizeof(status))) { + error_report("Insufficient written data (%zu)", dev_written); ++ goto out; ++ } ++ ++ memcpy(&status, dev_buffers[1].iov_base, sizeof(status)); ++ if (status != VIRTIO_NET_OK) { ++ goto out; ++ } ++ ++ status = VIRTIO_NET_ERR; ++ virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, dev_buffers, 1); ++ if (status != VIRTIO_NET_OK) { ++ error_report("Bad CVQ processing in model"); + } + + out: +@@ -234,6 +418,12 @@ out: + } + vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status))); + g_free(elem); ++ if (dev_buffers[0].iov_base) { ++ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[0].iov_base); ++ } ++ if (dev_buffers[1].iov_base) { ++ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[1].iov_base); ++ } + return r; + } + +@@ -266,6 +456,13 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + s->vhost_vdpa.device_fd = vdpa_device_fd; + s->vhost_vdpa.index = queue_pair_index; + if (!is_datapath) { ++ s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size, ++ vhost_vdpa_net_cvq_cmd_page_len()); ++ memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len()); ++ s->cvq_cmd_in_buffer = qemu_memalign(qemu_real_host_page_size, ++ vhost_vdpa_net_cvq_cmd_page_len()); ++ memset(s->cvq_cmd_in_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len()); ++ + s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; + s->vhost_vdpa.shadow_vq_ops_opaque = s; + } +-- +2.31.1 + diff --git a/kvm-vdpa-Export-vhost_vdpa_dma_map-and-unmap-calls.patch b/kvm-vdpa-Export-vhost_vdpa_dma_map-and-unmap-calls.patch new file mode 100644 index 0000000..d6e72ac --- /dev/null +++ b/kvm-vdpa-Export-vhost_vdpa_dma_map-and-unmap-calls.patch @@ -0,0 +1,84 @@ +From 3a5d325fcb2958318262efac31d5fd25fb062523 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 21/32] vdpa: Export vhost_vdpa_dma_map and unmap calls +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [21/27] 97e7a583bbd3c12a0786d53132812ec41702c190 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 463ba1e3b8cf080812895c5f26d95d8d7db2e692 +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:40 2022 +0200 + + vdpa: Export vhost_vdpa_dma_map and unmap calls + + Shadow CVQ will copy buffers on qemu VA, so we avoid TOCTOU attacks from + the guest that could set a different state in qemu device model and vdpa + device. + + To do so, it needs to be able to map these new buffers to the device. + + Signed-off-by: Eugenio Pérez + Acked-by: Jason Wang + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-vdpa.c | 7 +++---- + include/hw/virtio/vhost-vdpa.h | 4 ++++ + 2 files changed, 7 insertions(+), 4 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 28df57b12e..14b02fe079 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -71,8 +71,8 @@ static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section, + return false; + } + +-static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, +- void *vaddr, bool readonly) ++int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, ++ void *vaddr, bool readonly) + { + struct vhost_msg_v2 msg = {}; + int fd = v->device_fd; +@@ -97,8 +97,7 @@ static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, + return ret; + } + +-static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, +- hwaddr size) ++int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size) + { + struct vhost_msg_v2 msg = {}; + int fd = v->device_fd; +diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h +index a29dbb3f53..7214eb47dc 100644 +--- a/include/hw/virtio/vhost-vdpa.h ++++ b/include/hw/virtio/vhost-vdpa.h +@@ -39,4 +39,8 @@ typedef struct vhost_vdpa { + VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX]; + } VhostVDPA; + ++int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, ++ void *vaddr, bool readonly); ++int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size); ++ + #endif +-- +2.31.1 + diff --git a/kvm-vdpa-Extract-get-features-part-from-vhost_vdpa_get_m.patch b/kvm-vdpa-Extract-get-features-part-from-vhost_vdpa_get_m.patch new file mode 100644 index 0000000..44e97af --- /dev/null +++ b/kvm-vdpa-Extract-get-features-part-from-vhost_vdpa_get_m.patch @@ -0,0 +1,108 @@ +From 9a290bd74f983f3a65aa9ec5df2da9aa94bfdecd Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 16:05:42 +0200 +Subject: [PATCH 25/32] vdpa: Extract get features part from + vhost_vdpa_get_max_queue_pairs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [25/27] 654ad68e10a4df84cced923c64e72d500721ad67 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 8170ab3f43989680491d00f1017f60b25d346114 +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:44 2022 +0200 + + vdpa: Extract get features part from vhost_vdpa_get_max_queue_pairs + + To know the device features is needed for CVQ SVQ, so SVQ knows if it + can handle all commands or not. Extract from + vhost_vdpa_get_max_queue_pairs so we can reuse it. + + Signed-off-by: Eugenio Pérez + Acked-by: Jason Wang + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + net/vhost-vdpa.c | 30 ++++++++++++++++++++---------- + 1 file changed, 20 insertions(+), 10 deletions(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index df42822463..8b76dac966 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -474,20 +474,24 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + return nc; + } + +-static int vhost_vdpa_get_max_queue_pairs(int fd, int *has_cvq, Error **errp) ++static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp) ++{ ++ int ret = ioctl(fd, VHOST_GET_FEATURES, features); ++ if (unlikely(ret < 0)) { ++ error_setg_errno(errp, errno, ++ "Fail to query features from vhost-vDPA device"); ++ } ++ return ret; ++} ++ ++static int vhost_vdpa_get_max_queue_pairs(int fd, uint64_t features, ++ int *has_cvq, Error **errp) + { + unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); + g_autofree struct vhost_vdpa_config *config = NULL; + __virtio16 *max_queue_pairs; +- uint64_t features; + int ret; + +- ret = ioctl(fd, VHOST_GET_FEATURES, &features); +- if (ret) { +- error_setg(errp, "Fail to query features from vhost-vDPA device"); +- return ret; +- } +- + if (features & (1 << VIRTIO_NET_F_CTRL_VQ)) { + *has_cvq = 1; + } else { +@@ -517,10 +521,11 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + NetClientState *peer, Error **errp) + { + const NetdevVhostVDPAOptions *opts; ++ uint64_t features; + int vdpa_device_fd; + g_autofree NetClientState **ncs = NULL; + NetClientState *nc; +- int queue_pairs, i, has_cvq = 0; ++ int queue_pairs, r, i, has_cvq = 0; + + assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA); + opts = &netdev->u.vhost_vdpa; +@@ -534,7 +539,12 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + return -errno; + } + +- queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, ++ r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp); ++ if (unlikely(r < 0)) { ++ return r; ++ } ++ ++ queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features, + &has_cvq, errp); + if (queue_pairs < 0) { + qemu_close(vdpa_device_fd); +-- +2.31.1 + diff --git a/kvm-vdpa-manual-forward-CVQ-buffers.patch b/kvm-vdpa-manual-forward-CVQ-buffers.patch new file mode 100644 index 0000000..61909ff --- /dev/null +++ b/kvm-vdpa-manual-forward-CVQ-buffers.patch @@ -0,0 +1,166 @@ +From c33bc0b7f2b5cfa330a6d89d60ee94de129c65c1 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 16:05:38 +0200 +Subject: [PATCH 23/32] vdpa: manual forward CVQ buffers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [23/27] ce128d5152be7eebf87e186eb8b58c2ed95aff6d (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit bd907ae4b00ebedad5e586af05ea3d6490318d45 +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:42 2022 +0200 + + vdpa: manual forward CVQ buffers + + Do a simple forwarding of CVQ buffers, the same work SVQ could do but + through callbacks. No functional change intended. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-vdpa.c | 3 +- + include/hw/virtio/vhost-vdpa.h | 3 ++ + net/vhost-vdpa.c | 58 ++++++++++++++++++++++++++++++++++ + 3 files changed, 63 insertions(+), 1 deletion(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 14b02fe079..49effe5462 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -417,7 +417,8 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, + for (unsigned n = 0; n < hdev->nvqs; ++n) { + g_autoptr(VhostShadowVirtqueue) svq; + +- svq = vhost_svq_new(v->iova_tree, NULL, NULL); ++ svq = vhost_svq_new(v->iova_tree, v->shadow_vq_ops, ++ v->shadow_vq_ops_opaque); + if (unlikely(!svq)) { + error_setg(errp, "Cannot create svq %u", n); + return -1; +diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h +index 7214eb47dc..1111d85643 100644 +--- a/include/hw/virtio/vhost-vdpa.h ++++ b/include/hw/virtio/vhost-vdpa.h +@@ -15,6 +15,7 @@ + #include + + #include "hw/virtio/vhost-iova-tree.h" ++#include "hw/virtio/vhost-shadow-virtqueue.h" + #include "hw/virtio/virtio.h" + #include "standard-headers/linux/vhost_types.h" + +@@ -35,6 +36,8 @@ typedef struct vhost_vdpa { + /* IOVA mapping used by the Shadow Virtqueue */ + VhostIOVATree *iova_tree; + GPtrArray *shadow_vqs; ++ const VhostShadowVirtqueueOps *shadow_vq_ops; ++ void *shadow_vq_ops_opaque; + struct vhost_dev *dev; + VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX]; + } VhostVDPA; +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index df1e69ee72..2e3b6b10d8 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -11,11 +11,14 @@ + + #include "qemu/osdep.h" + #include "clients.h" ++#include "hw/virtio/virtio-net.h" + #include "net/vhost_net.h" + #include "net/vhost-vdpa.h" + #include "hw/virtio/vhost-vdpa.h" + #include "qemu/config-file.h" + #include "qemu/error-report.h" ++#include "qemu/log.h" ++#include "qemu/memalign.h" + #include "qemu/option.h" + #include "qapi/error.h" + #include +@@ -187,6 +190,57 @@ static NetClientInfo net_vhost_vdpa_info = { + .check_peer_type = vhost_vdpa_check_peer_type, + }; + ++/** ++ * Forward buffer for the moment. ++ */ ++static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, ++ VirtQueueElement *elem, ++ void *opaque) ++{ ++ unsigned int n = elem->out_num + elem->in_num; ++ g_autofree struct iovec *dev_buffers = g_new(struct iovec, n); ++ size_t in_len, dev_written; ++ virtio_net_ctrl_ack status = VIRTIO_NET_ERR; ++ int r; ++ ++ memcpy(dev_buffers, elem->out_sg, elem->out_num); ++ memcpy(dev_buffers + elem->out_num, elem->in_sg, elem->in_num); ++ ++ r = vhost_svq_add(svq, &dev_buffers[0], elem->out_num, &dev_buffers[1], ++ elem->in_num, elem); ++ if (unlikely(r != 0)) { ++ if (unlikely(r == -ENOSPC)) { ++ qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", ++ __func__); ++ } ++ goto out; ++ } ++ ++ /* ++ * We can poll here since we've had BQL from the time we sent the ++ * descriptor. Also, we need to take the answer before SVQ pulls by itself, ++ * when BQL is released ++ */ ++ dev_written = vhost_svq_poll(svq); ++ if (unlikely(dev_written < sizeof(status))) { ++ error_report("Insufficient written data (%zu)", dev_written); ++ } ++ ++out: ++ in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, ++ sizeof(status)); ++ if (unlikely(in_len < sizeof(status))) { ++ error_report("Bad device CVQ written length"); ++ } ++ vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status))); ++ g_free(elem); ++ return r; ++} ++ ++static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { ++ .avail_handler = vhost_vdpa_net_handle_ctrl_avail, ++}; ++ + static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + const char *device, + const char *name, +@@ -211,6 +265,10 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + + s->vhost_vdpa.device_fd = vdpa_device_fd; + s->vhost_vdpa.index = queue_pair_index; ++ if (!is_datapath) { ++ s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; ++ s->vhost_vdpa.shadow_vq_ops_opaque = s; ++ } + ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); + if (ret) { + qemu_del_net_client(nc); +-- +2.31.1 + diff --git a/kvm-vfio-common-remove-spurious-tpm-crb-cmd-misalignment.patch b/kvm-vfio-common-remove-spurious-tpm-crb-cmd-misalignment.patch new file mode 100644 index 0000000..26083c1 --- /dev/null +++ b/kvm-vfio-common-remove-spurious-tpm-crb-cmd-misalignment.patch @@ -0,0 +1,114 @@ +From b90a5878355bd549200ed1eff52ea084325bfc8a Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Fri, 6 May 2022 15:25:10 +0200 +Subject: [PATCH 5/5] vfio/common: remove spurious tpm-crb-cmd misalignment + warning +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 84: vfio/common: Remove spurious tpm-crb-cmd misalignment warning +RH-Commit: [2/2] 9b73a9aec59cb50d5e3468cc553464bf4a73d0a1 (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2037612 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cornelia Huck +RH-Acked-by: Andrew Jones + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2037612 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45166961 +Upstream Status: YES +Tested: With TPM-CRB and VFIO + +The CRB command buffer currently is a RAM MemoryRegion and given +its base address alignment, it causes an error report on +vfio_listener_region_add(). This region could have been a RAM device +region, easing the detection of such safe situation but this option +was not well received. So let's add a helper function that uses the +memory region owner type to detect the situation is safe wrt +the assignment. Other device types can be checked here if such kind +of problem occurs again. + +Conflicts in hw/vfio/common.c +We don't have 8e3b0cbb721 ("Replace qemu_real_host_page variables with inlined functions") + +Signed-off-by: Eric Auger +Reviewed-by: Philippe Mathieu-Daudé +Acked-by: Stefan Berger +Reviewed-by: Cornelia Huck +Link: https://lore.kernel.org/r/20220506132510.1847942-3-eric.auger@redhat.com +Signed-off-by: Alex Williamson +(cherry picked from commit 851d6d1a0ff29a87ec588205842edf6b86d99b5c) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 27 ++++++++++++++++++++++++++- + hw/vfio/trace-events | 1 + + 2 files changed, 27 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 080046e3f5..0fbe0d47af 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -40,6 +40,7 @@ + #include "trace.h" + #include "qapi/error.h" + #include "migration/migration.h" ++#include "sysemu/tpm.h" + + VFIOGroupList vfio_group_list = + QLIST_HEAD_INITIALIZER(vfio_group_list); +@@ -861,6 +862,22 @@ static void vfio_unregister_ram_discard_listener(VFIOContainer *container, + g_free(vrdl); + } + ++static bool vfio_known_safe_misalignment(MemoryRegionSection *section) ++{ ++ MemoryRegion *mr = section->mr; ++ ++ if (!TPM_IS_CRB(mr->owner)) { ++ return false; ++ } ++ ++ /* this is a known safe misaligned region, just trace for debug purpose */ ++ trace_vfio_known_safe_misalignment(memory_region_name(mr), ++ section->offset_within_address_space, ++ section->offset_within_region, ++ qemu_real_host_page_size); ++ return true; ++} ++ + static void vfio_listener_region_add(MemoryListener *listener, + MemoryRegionSection *section) + { +@@ -884,7 +901,15 @@ static void vfio_listener_region_add(MemoryListener *listener, + if (unlikely((section->offset_within_address_space & + ~qemu_real_host_page_mask) != + (section->offset_within_region & ~qemu_real_host_page_mask))) { +- error_report("%s received unaligned region", __func__); ++ if (!vfio_known_safe_misalignment(section)) { ++ error_report("%s received unaligned region %s iova=0x%"PRIx64 ++ " offset_within_region=0x%"PRIx64 ++ " qemu_real_host_page_size=0x%"PRIxPTR, ++ __func__, memory_region_name(section->mr), ++ section->offset_within_address_space, ++ section->offset_within_region, ++ qemu_real_host_page_size); ++ } + return; + } + +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 0ef1b5f4a6..582882db91 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -100,6 +100,7 @@ vfio_listener_region_add_skip(uint64_t start, uint64_t end) "SKIPPING region_add + vfio_spapr_group_attach(int groupfd, int tablefd) "Attached groupfd %d to liobn fd %d" + vfio_listener_region_add_iommu(uint64_t start, uint64_t end) "region_add [iommu] 0x%"PRIx64" - 0x%"PRIx64 + vfio_listener_region_add_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr) "region_add [ram] 0x%"PRIx64" - 0x%"PRIx64" [%p]" ++vfio_known_safe_misalignment(const char *name, uint64_t iova, uint64_t offset_within_region, uintptr_t page_size) "Region \"%s\" iova=0x%"PRIx64" offset_within_region=0x%"PRIx64" qemu_real_host_page_size=0x%"PRIxPTR ": cannot be mapped for DMA" + vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t size, uint64_t page_size) "Region \"%s\" 0x%"PRIx64" size=0x%"PRIx64" is not aligned to 0x%"PRIx64" and cannot be mapped for DMA" + vfio_listener_region_del_skip(uint64_t start, uint64_t end) "SKIPPING region_del 0x%"PRIx64" - 0x%"PRIx64 + vfio_listener_region_del(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64 +-- +2.31.1 + diff --git a/kvm-vfio-common-remove-spurious-warning-on-vfio_listener.patch b/kvm-vfio-common-remove-spurious-warning-on-vfio_listener.patch new file mode 100644 index 0000000..7e644c5 --- /dev/null +++ b/kvm-vfio-common-remove-spurious-warning-on-vfio_listener.patch @@ -0,0 +1,78 @@ +From 3de8fb9f3dba18d04efa10b70bcec641035effc5 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Tue, 24 May 2022 05:14:05 -0400 +Subject: [PATCH 16/16] vfio/common: remove spurious warning on + vfio_listener_region_del + +RH-Author: Eric Auger +RH-MergeRequest: 101: vfio/common: remove spurious warning on vfio_listener_region_del +RH-Commit: [1/1] dac688b8a981ebb964fea79ea198c329b9cdb551 (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2086262 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cornelia Huck +RH-Acked-by: Alex Williamson + + Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2086262 + Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45876133 + Upstream Status: YES + Tested: With TPM-CRB and VFIO + +851d6d1a0f ("vfio/common: remove spurious tpm-crb-cmd misalignment +warning") removed the warning on vfio_listener_region_add() path. + +However the same warning also hits on region_del path. Let's remove +it and reword the dynamic trace as this can be called on both +map and unmap path. + +Contextual Conflict in hw/vfio/common.c +We don't have 8e3b0cbb721 ("Replace qemu_real_host_page variables with inlined functions") + +Signed-off-by: Eric Auger +Reviewed-by: Cornelia Huck +Link: https://lore.kernel.org/r/20220524091405.416256-1-eric.auger@redhat.com +Fixes: 851d6d1a0ff2 ("vfio/common: remove spurious tpm-crb-cmd misalignment warning") +Signed-off-by: Alex Williamson +(cherry picked from commit ec6600be0dc16982181c7ad80d94c143c0807dd2) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 10 +++++++++- + hw/vfio/trace-events | 2 +- + 2 files changed, 10 insertions(+), 2 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 0fbe0d47af..637981f9a1 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1145,7 +1145,15 @@ static void vfio_listener_region_del(MemoryListener *listener, + if (unlikely((section->offset_within_address_space & + ~qemu_real_host_page_mask) != + (section->offset_within_region & ~qemu_real_host_page_mask))) { +- error_report("%s received unaligned region", __func__); ++ if (!vfio_known_safe_misalignment(section)) { ++ error_report("%s received unaligned region %s iova=0x%"PRIx64 ++ " offset_within_region=0x%"PRIx64 ++ " qemu_real_host_page_size=0x%"PRIxPTR, ++ __func__, memory_region_name(section->mr), ++ section->offset_within_address_space, ++ section->offset_within_region, ++ qemu_real_host_page_size); ++ } + return; + } + +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 582882db91..73dffe9e00 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -100,7 +100,7 @@ vfio_listener_region_add_skip(uint64_t start, uint64_t end) "SKIPPING region_add + vfio_spapr_group_attach(int groupfd, int tablefd) "Attached groupfd %d to liobn fd %d" + vfio_listener_region_add_iommu(uint64_t start, uint64_t end) "region_add [iommu] 0x%"PRIx64" - 0x%"PRIx64 + vfio_listener_region_add_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr) "region_add [ram] 0x%"PRIx64" - 0x%"PRIx64" [%p]" +-vfio_known_safe_misalignment(const char *name, uint64_t iova, uint64_t offset_within_region, uintptr_t page_size) "Region \"%s\" iova=0x%"PRIx64" offset_within_region=0x%"PRIx64" qemu_real_host_page_size=0x%"PRIxPTR ": cannot be mapped for DMA" ++vfio_known_safe_misalignment(const char *name, uint64_t iova, uint64_t offset_within_region, uintptr_t page_size) "Region \"%s\" iova=0x%"PRIx64" offset_within_region=0x%"PRIx64" qemu_real_host_page_size=0x%"PRIxPTR + vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t size, uint64_t page_size) "Region \"%s\" 0x%"PRIx64" size=0x%"PRIx64" is not aligned to 0x%"PRIx64" and cannot be mapped for DMA" + vfio_listener_region_del_skip(uint64_t start, uint64_t end) "SKIPPING region_del 0x%"PRIx64" - 0x%"PRIx64 + vfio_listener_region_del(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64 +-- +2.31.1 + diff --git a/kvm-vhost-Add-SVQDescState.patch b/kvm-vhost-Add-SVQDescState.patch new file mode 100644 index 0000000..b1ea4bb --- /dev/null +++ b/kvm-vhost-Add-SVQDescState.patch @@ -0,0 +1,135 @@ +From 14200f493243f73152ea4a4b97274f0ec4fb36fa Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 15/32] vhost: Add SVQDescState +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [15/27] 2e2866f22e37cace8598ff44dfcdc07fcc915d6d (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 9e87868fcaf5785c8e1490c290505fa32305ff91 +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:34 2022 +0200 + + vhost: Add SVQDescState + + This will allow SVQ to add context to the different queue elements. + + This patch only store the actual element, no functional change intended. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 16 ++++++++-------- + hw/virtio/vhost-shadow-virtqueue.h | 8 ++++++-- + 2 files changed, 14 insertions(+), 10 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 3cec03d709..a08e3d4025 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -256,7 +256,7 @@ static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, + return -EINVAL; + } + +- svq->ring_id_maps[qemu_head] = elem; ++ svq->desc_state[qemu_head].elem = elem; + vhost_svq_kick(svq); + return 0; + } +@@ -411,21 +411,21 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, + return NULL; + } + +- if (unlikely(!svq->ring_id_maps[used_elem.id])) { ++ if (unlikely(!svq->desc_state[used_elem.id].elem)) { + qemu_log_mask(LOG_GUEST_ERROR, + "Device %s says index %u is used, but it was not available", + svq->vdev->name, used_elem.id); + return NULL; + } + +- num = svq->ring_id_maps[used_elem.id]->in_num + +- svq->ring_id_maps[used_elem.id]->out_num; ++ num = svq->desc_state[used_elem.id].elem->in_num + ++ svq->desc_state[used_elem.id].elem->out_num; + last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id); + svq->desc_next[last_used_chain] = svq->free_head; + svq->free_head = used_elem.id; + + *len = used_elem.len; +- return g_steal_pointer(&svq->ring_id_maps[used_elem.id]); ++ return g_steal_pointer(&svq->desc_state[used_elem.id].elem); + } + + static void vhost_svq_flush(VhostShadowVirtqueue *svq, +@@ -595,7 +595,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, + memset(svq->vring.desc, 0, driver_size); + svq->vring.used = qemu_memalign(qemu_real_host_page_size, device_size); + memset(svq->vring.used, 0, device_size); +- svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num); ++ svq->desc_state = g_new0(SVQDescState, svq->vring.num); + svq->desc_next = g_new0(uint16_t, svq->vring.num); + for (unsigned i = 0; i < svq->vring.num - 1; i++) { + svq->desc_next[i] = cpu_to_le16(i + 1); +@@ -620,7 +620,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) + + for (unsigned i = 0; i < svq->vring.num; ++i) { + g_autofree VirtQueueElement *elem = NULL; +- elem = g_steal_pointer(&svq->ring_id_maps[i]); ++ elem = g_steal_pointer(&svq->desc_state[i].elem); + if (elem) { + virtqueue_detach_element(svq->vq, elem, 0); + } +@@ -632,7 +632,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) + } + svq->vq = NULL; + g_free(svq->desc_next); +- g_free(svq->ring_id_maps); ++ g_free(svq->desc_state); + qemu_vfree(svq->vring.desc); + qemu_vfree(svq->vring.used); + } +diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h +index c132c994e9..d646c35054 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.h ++++ b/hw/virtio/vhost-shadow-virtqueue.h +@@ -15,6 +15,10 @@ + #include "standard-headers/linux/vhost_types.h" + #include "hw/virtio/vhost-iova-tree.h" + ++typedef struct SVQDescState { ++ VirtQueueElement *elem; ++} SVQDescState; ++ + /* Shadow virtqueue to relay notifications */ + typedef struct VhostShadowVirtqueue { + /* Shadow vring */ +@@ -47,8 +51,8 @@ typedef struct VhostShadowVirtqueue { + /* IOVA mapping */ + VhostIOVATree *iova_tree; + +- /* Map for use the guest's descriptors */ +- VirtQueueElement **ring_id_maps; ++ /* SVQ vring descriptors state */ ++ SVQDescState *desc_state; + + /* Next VirtQueue element that guest made available */ + VirtQueueElement *next_guest_avail_elem; +-- +2.31.1 + diff --git a/kvm-vhost-Add-svq-avail_handler-callback.patch b/kvm-vhost-Add-svq-avail_handler-callback.patch new file mode 100644 index 0000000..a8b585d --- /dev/null +++ b/kvm-vhost-Add-svq-avail_handler-callback.patch @@ -0,0 +1,164 @@ +From 433106c286a1961737300ebaece6f10b2747e7d8 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 20/32] vhost: Add svq avail_handler callback +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [20/27] d228eb89d204f8be623bc870503bbf0078dfc9ae (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit e966c0b781aebabd2c0f5eef91678f08ce1d068c +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:39 2022 +0200 + + vhost: Add svq avail_handler callback + + This allows external handlers to be aware of new buffers that the guest + places in the virtqueue. + + When this callback is defined the ownership of the guest's virtqueue + element is transferred to the callback. This means that if the user + wants to forward the descriptor it needs to manually inject it. The + callback is also free to process the command by itself and use the + element with svq_push. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 14 ++++++++++++-- + hw/virtio/vhost-shadow-virtqueue.h | 31 +++++++++++++++++++++++++++++- + hw/virtio/vhost-vdpa.c | 3 ++- + 3 files changed, 44 insertions(+), 4 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 95d0d7a7ee..e53aac45f6 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -306,7 +306,11 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) + break; + } + +- r = vhost_svq_add_element(svq, elem); ++ if (svq->ops) { ++ r = svq->ops->avail_handler(svq, elem, svq->ops_opaque); ++ } else { ++ r = vhost_svq_add_element(svq, elem); ++ } + if (unlikely(r != 0)) { + if (r == -ENOSPC) { + /* +@@ -685,12 +689,16 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) + * shadow methods and file descriptors. + * + * @iova_tree: Tree to perform descriptors translations ++ * @ops: SVQ owner callbacks ++ * @ops_opaque: ops opaque pointer + * + * Returns the new virtqueue or NULL. + * + * In case of error, reason is reported through error_report. + */ +-VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree) ++VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree, ++ const VhostShadowVirtqueueOps *ops, ++ void *ops_opaque) + { + g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1); + int r; +@@ -712,6 +720,8 @@ VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree) + event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND); + event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call); + svq->iova_tree = iova_tree; ++ svq->ops = ops; ++ svq->ops_opaque = ops_opaque; + return g_steal_pointer(&svq); + + err_init_hdev_call: +diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h +index cf442f7dea..d04c34a589 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.h ++++ b/hw/virtio/vhost-shadow-virtqueue.h +@@ -25,6 +25,27 @@ typedef struct SVQDescState { + unsigned int ndescs; + } SVQDescState; + ++typedef struct VhostShadowVirtqueue VhostShadowVirtqueue; ++ ++/** ++ * Callback to handle an avail buffer. ++ * ++ * @svq: Shadow virtqueue ++ * @elem: Element placed in the queue by the guest ++ * @vq_callback_opaque: Opaque ++ * ++ * Returns 0 if the vq is running as expected. ++ * ++ * Note that ownership of elem is transferred to the callback. ++ */ ++typedef int (*VirtQueueAvailCallback)(VhostShadowVirtqueue *svq, ++ VirtQueueElement *elem, ++ void *vq_callback_opaque); ++ ++typedef struct VhostShadowVirtqueueOps { ++ VirtQueueAvailCallback avail_handler; ++} VhostShadowVirtqueueOps; ++ + /* Shadow virtqueue to relay notifications */ + typedef struct VhostShadowVirtqueue { + /* Shadow vring */ +@@ -69,6 +90,12 @@ typedef struct VhostShadowVirtqueue { + */ + uint16_t *desc_next; + ++ /* Caller callbacks */ ++ const VhostShadowVirtqueueOps *ops; ++ ++ /* Caller callbacks opaque */ ++ void *ops_opaque; ++ + /* Next head to expose to the device */ + uint16_t shadow_avail_idx; + +@@ -102,7 +129,9 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, + VirtQueue *vq); + void vhost_svq_stop(VhostShadowVirtqueue *svq); + +-VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree); ++VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree, ++ const VhostShadowVirtqueueOps *ops, ++ void *ops_opaque); + + void vhost_svq_free(gpointer vq); + G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free); +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 33dcaa135e..28df57b12e 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -416,8 +416,9 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, + + shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free); + for (unsigned n = 0; n < hdev->nvqs; ++n) { +- g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new(v->iova_tree); ++ g_autoptr(VhostShadowVirtqueue) svq; + ++ svq = vhost_svq_new(v->iova_tree, NULL, NULL); + if (unlikely(!svq)) { + error_setg(errp, "Cannot create svq %u", n); + return -1; +-- +2.31.1 + diff --git a/kvm-vhost-Check-for-queue-full-at-vhost_svq_add.patch b/kvm-vhost-Check-for-queue-full-at-vhost_svq_add.patch new file mode 100644 index 0000000..9b09d42 --- /dev/null +++ b/kvm-vhost-Check-for-queue-full-at-vhost_svq_add.patch @@ -0,0 +1,134 @@ +From 893dffb820973361bcef33612a6b924554a856c1 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 13/32] vhost: Check for queue full at vhost_svq_add +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [13/27] d4bd8299fb7733a1e190618dfc92b4b53b7bbeb3 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit f20b70eb5a68cfd8fef74a13ccdd494ef1cb0221 +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:32 2022 +0200 + + vhost: Check for queue full at vhost_svq_add + + The series need to expose vhost_svq_add with full functionality, + including checking for full queue. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 59 +++++++++++++++++------------- + 1 file changed, 33 insertions(+), 26 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index e3fc3c2658..1d2bab287b 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -233,21 +233,29 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq) + * Add an element to a SVQ. + * + * The caller must check that there is enough slots for the new element. It +- * takes ownership of the element: In case of failure, it is free and the SVQ +- * is considered broken. ++ * takes ownership of the element: In case of failure not ENOSPC, it is free. ++ * ++ * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full + */ +-static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) ++static int vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) + { + unsigned qemu_head; +- bool ok = vhost_svq_add_split(svq, elem, &qemu_head); ++ unsigned ndescs = elem->in_num + elem->out_num; ++ bool ok; ++ ++ if (unlikely(ndescs > vhost_svq_available_slots(svq))) { ++ return -ENOSPC; ++ } ++ ++ ok = vhost_svq_add_split(svq, elem, &qemu_head); + if (unlikely(!ok)) { + g_free(elem); +- return false; ++ return -EINVAL; + } + + svq->ring_id_maps[qemu_head] = elem; + vhost_svq_kick(svq); +- return true; ++ return 0; + } + + /** +@@ -274,7 +282,7 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) + + while (true) { + VirtQueueElement *elem; +- bool ok; ++ int r; + + if (svq->next_guest_avail_elem) { + elem = g_steal_pointer(&svq->next_guest_avail_elem); +@@ -286,25 +294,24 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) + break; + } + +- if (elem->out_num + elem->in_num > vhost_svq_available_slots(svq)) { +- /* +- * This condition is possible since a contiguous buffer in GPA +- * does not imply a contiguous buffer in qemu's VA +- * scatter-gather segments. If that happens, the buffer exposed +- * to the device needs to be a chain of descriptors at this +- * moment. +- * +- * SVQ cannot hold more available buffers if we are here: +- * queue the current guest descriptor and ignore further kicks +- * until some elements are used. +- */ +- svq->next_guest_avail_elem = elem; +- return; +- } +- +- ok = vhost_svq_add(svq, elem); +- if (unlikely(!ok)) { +- /* VQ is broken, just return and ignore any other kicks */ ++ r = vhost_svq_add(svq, elem); ++ if (unlikely(r != 0)) { ++ if (r == -ENOSPC) { ++ /* ++ * This condition is possible since a contiguous buffer in ++ * GPA does not imply a contiguous buffer in qemu's VA ++ * scatter-gather segments. If that happens, the buffer ++ * exposed to the device needs to be a chain of descriptors ++ * at this moment. ++ * ++ * SVQ cannot hold more available buffers if we are here: ++ * queue the current guest descriptor and ignore kicks ++ * until some elements are used. ++ */ ++ svq->next_guest_avail_elem = elem; ++ } ++ ++ /* VQ is full or broken, just return and ignore kicks */ + return; + } + } +-- +2.31.1 + diff --git a/kvm-vhost-Decouple-vhost_svq_add-from-VirtQueueElement.patch b/kvm-vhost-Decouple-vhost_svq_add-from-VirtQueueElement.patch new file mode 100644 index 0000000..6755aad --- /dev/null +++ b/kvm-vhost-Decouple-vhost_svq_add-from-VirtQueueElement.patch @@ -0,0 +1,138 @@ +From 5c8de23e185a1a1f0b19eac3c9fa03411c9f545c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 14/32] vhost: Decouple vhost_svq_add from VirtQueueElement +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [14/27] 463087dd316adc91b9c7a4e6634c6fc1745c1849 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 1f46ae65d85f677b660bda46685dd3e94885a7cb +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:33 2022 +0200 + + vhost: Decouple vhost_svq_add from VirtQueueElement + + VirtQueueElement comes from the guest, but we're heading SVQ to be able + to modify the element presented to the device without the guest's + knowledge. + + To do so, make SVQ accept sg buffers directly, instead of using + VirtQueueElement. + + Add vhost_svq_add_element to maintain element convenience. + + Signed-off-by: Eugenio Pérez + Acked-by: Jason Wang + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 33 ++++++++++++++++++++---------- + 1 file changed, 22 insertions(+), 11 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 1d2bab287b..3cec03d709 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -172,30 +172,31 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, + } + + static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, +- VirtQueueElement *elem, unsigned *head) ++ const struct iovec *out_sg, size_t out_num, ++ const struct iovec *in_sg, size_t in_num, ++ unsigned *head) + { + unsigned avail_idx; + vring_avail_t *avail = svq->vring.avail; + bool ok; +- g_autofree hwaddr *sgs = g_new(hwaddr, MAX(elem->out_num, elem->in_num)); ++ g_autofree hwaddr *sgs = g_new(hwaddr, MAX(out_num, in_num)); + + *head = svq->free_head; + + /* We need some descriptors here */ +- if (unlikely(!elem->out_num && !elem->in_num)) { ++ if (unlikely(!out_num && !in_num)) { + qemu_log_mask(LOG_GUEST_ERROR, + "Guest provided element with no descriptors"); + return false; + } + +- ok = vhost_svq_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num, +- elem->in_num > 0, false); ++ ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, in_num > 0, ++ false); + if (unlikely(!ok)) { + return false; + } + +- ok = vhost_svq_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, +- true); ++ ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true); + if (unlikely(!ok)) { + return false; + } +@@ -237,17 +238,19 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq) + * + * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full + */ +-static int vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) ++static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, ++ size_t out_num, const struct iovec *in_sg, ++ size_t in_num, VirtQueueElement *elem) + { + unsigned qemu_head; +- unsigned ndescs = elem->in_num + elem->out_num; ++ unsigned ndescs = in_num + out_num; + bool ok; + + if (unlikely(ndescs > vhost_svq_available_slots(svq))) { + return -ENOSPC; + } + +- ok = vhost_svq_add_split(svq, elem, &qemu_head); ++ ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head); + if (unlikely(!ok)) { + g_free(elem); + return -EINVAL; +@@ -258,6 +261,14 @@ static int vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) + return 0; + } + ++/* Convenience wrapper to add a guest's element to SVQ */ ++static int vhost_svq_add_element(VhostShadowVirtqueue *svq, ++ VirtQueueElement *elem) ++{ ++ return vhost_svq_add(svq, elem->out_sg, elem->out_num, elem->in_sg, ++ elem->in_num, elem); ++} ++ + /** + * Forward available buffers. + * +@@ -294,7 +305,7 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) + break; + } + +- r = vhost_svq_add(svq, elem); ++ r = vhost_svq_add_element(svq, elem); + if (unlikely(r != 0)) { + if (r == -ENOSPC) { + /* +-- +2.31.1 + diff --git a/kvm-vhost-Expose-vhost_svq_add.patch b/kvm-vhost-Expose-vhost_svq_add.patch new file mode 100644 index 0000000..70dc774 --- /dev/null +++ b/kvm-vhost-Expose-vhost_svq_add.patch @@ -0,0 +1,73 @@ +From cefd6583a8483c7a80f9cde8f7ad4705983af9e7 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 18/32] vhost: Expose vhost_svq_add +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [18/27] bfb44f597d350336113783bcc9b3c9d9d32ff8c0 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit d0291f3f284d3bc220cdb13b0d8ac8a44eb5fd4c +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:37 2022 +0200 + + vhost: Expose vhost_svq_add + + This allows external parts of SVQ to forward custom buffers to the + device. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 6 +++--- + hw/virtio/vhost-shadow-virtqueue.h | 3 +++ + 2 files changed, 6 insertions(+), 3 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 1ce52d5b4a..cb879e7b88 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -238,9 +238,9 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq) + * + * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full + */ +-static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, +- size_t out_num, const struct iovec *in_sg, +- size_t in_num, VirtQueueElement *elem) ++int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, ++ size_t out_num, const struct iovec *in_sg, size_t in_num, ++ VirtQueueElement *elem) + { + unsigned qemu_head; + unsigned ndescs = in_num + out_num; +diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h +index d9fc1f1799..dd78f4bec2 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.h ++++ b/hw/virtio/vhost-shadow-virtqueue.h +@@ -86,6 +86,9 @@ bool vhost_svq_valid_features(uint64_t features, Error **errp); + + void vhost_svq_push_elem(VhostShadowVirtqueue *svq, + const VirtQueueElement *elem, uint32_t len); ++int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, ++ size_t out_num, const struct iovec *in_sg, size_t in_num, ++ VirtQueueElement *elem); + + void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd); + void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd); +-- +2.31.1 + diff --git a/kvm-vhost-Fix-device-s-used-descriptor-dequeue.patch b/kvm-vhost-Fix-device-s-used-descriptor-dequeue.patch new file mode 100644 index 0000000..f149c05 --- /dev/null +++ b/kvm-vhost-Fix-device-s-used-descriptor-dequeue.patch @@ -0,0 +1,83 @@ +From 793d6d56190397624efdcaf6e0112bd12e39c05d Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:25:01 +0200 +Subject: [PATCH 02/32] vhost: Fix device's used descriptor dequeue +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [2/27] b92803a0681c94c65d243dd07424522387594760 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 81abfa5724c9a6502d7a1d3a67c55f2a303a1170 +Author: Eugenio Pérez +Date: Thu May 12 19:57:43 2022 +0200 + + vhost: Fix device's used descriptor dequeue + + Only the first one of them were properly enqueued back. + + Fixes: 100890f7ca ("vhost: Shadow virtqueue buffers forwarding") + + Signed-off-by: Eugenio Pérez + Message-Id: <20220512175747.142058-3-eperezma@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 17 +++++++++++++++-- + 1 file changed, 15 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 3155801f50..31fc50907d 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -334,12 +334,22 @@ static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq) + svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT); + } + ++static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq, ++ uint16_t num, uint16_t i) ++{ ++ for (uint16_t j = 0; j < (num - 1); ++j) { ++ i = le16_to_cpu(svq->desc_next[i]); ++ } ++ ++ return i; ++} ++ + static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, + uint32_t *len) + { + const vring_used_t *used = svq->vring.used; + vring_used_elem_t used_elem; +- uint16_t last_used; ++ uint16_t last_used, last_used_chain, num; + + if (!vhost_svq_more_used(svq)) { + return NULL; +@@ -365,7 +375,10 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, + return NULL; + } + +- svq->desc_next[used_elem.id] = svq->free_head; ++ num = svq->ring_id_maps[used_elem.id]->in_num + ++ svq->ring_id_maps[used_elem.id]->out_num; ++ last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id); ++ svq->desc_next[last_used_chain] = svq->free_head; + svq->free_head = used_elem.id; + + *len = used_elem.len; +-- +2.31.1 + diff --git a/kvm-vhost-Fix-element-in-vhost_svq_add-failure.patch b/kvm-vhost-Fix-element-in-vhost_svq_add-failure.patch new file mode 100644 index 0000000..51eb700 --- /dev/null +++ b/kvm-vhost-Fix-element-in-vhost_svq_add-failure.patch @@ -0,0 +1,68 @@ +From aa99cf129923e0203c0caeb3b4e94a0eb973746f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:36:38 +0200 +Subject: [PATCH 04/32] vhost: Fix element in vhost_svq_add failure +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [4/27] 96689c99a47dd49591c0d126cb1fbb975b2f79b4 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 5181db132b587754dda3a520eec923b87a65bbb7 +Author: Eugenio Pérez +Date: Thu May 12 19:57:47 2022 +0200 + + vhost: Fix element in vhost_svq_add failure + + Coverity rightly reports that is not free in that case. + + Fixes: Coverity CID 1487559 + Fixes: 100890f7ca ("vhost: Shadow virtqueue buffers forwarding") + + Signed-off-by: Eugenio Pérez + Message-Id: <20220512175747.142058-7-eperezma@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 31fc50907d..06d0bb39d9 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -199,11 +199,19 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, + return true; + } + ++/** ++ * Add an element to a SVQ. ++ * ++ * The caller must check that there is enough slots for the new element. It ++ * takes ownership of the element: In case of failure, it is free and the SVQ ++ * is considered broken. ++ */ + static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) + { + unsigned qemu_head; + bool ok = vhost_svq_add_split(svq, elem, &qemu_head); + if (unlikely(!ok)) { ++ g_free(elem); + return false; + } + +-- +2.31.1 + diff --git a/kvm-vhost-Move-vhost_svq_kick-call-to-vhost_svq_add.patch b/kvm-vhost-Move-vhost_svq_kick-call-to-vhost_svq_add.patch new file mode 100644 index 0000000..513d7b4 --- /dev/null +++ b/kvm-vhost-Move-vhost_svq_kick-call-to-vhost_svq_add.patch @@ -0,0 +1,61 @@ +From 3a944d8cd3d35b2398ff68d9ed8ea51d27dfab3c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 12/32] vhost: Move vhost_svq_kick call to vhost_svq_add +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [12/27] 29a7e1fb4992c4beca1e9a3379bb4c8a0f567459 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 98b5adef8493a2bfad6655cfee84299e88bedbf7 +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:31 2022 +0200 + + vhost: Move vhost_svq_kick call to vhost_svq_add + + The series needs to expose vhost_svq_add with full functionality, + including kick + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 05cd39d1eb..e3fc3c2658 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -246,6 +246,7 @@ static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) + } + + svq->ring_id_maps[qemu_head] = elem; ++ vhost_svq_kick(svq); + return true; + } + +@@ -306,7 +307,6 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) + /* VQ is broken, just return and ignore any other kicks */ + return; + } +- vhost_svq_kick(svq); + } + + virtio_queue_set_notification(svq->vq, true); +-- +2.31.1 + diff --git a/kvm-vhost-Reorder-vhost_svq_kick.patch b/kvm-vhost-Reorder-vhost_svq_kick.patch new file mode 100644 index 0000000..f61f3c3 --- /dev/null +++ b/kvm-vhost-Reorder-vhost_svq_kick.patch @@ -0,0 +1,88 @@ +From fdbf66e4c70de16ab36d70ea591322b1b24df591 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 11/32] vhost: Reorder vhost_svq_kick +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [11/27] 1d08b97eb3960a0f85f2dd48c3331b803f7ea205 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit d93a2405ca6efa9dc1c420cee5a34bd8242818d0 +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:30 2022 +0200 + + vhost: Reorder vhost_svq_kick + + Future code needs to call it from vhost_svq_add. + + No functional change intended. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 28 ++++++++++++++-------------- + 1 file changed, 14 insertions(+), 14 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 9c46c3a8fa..05cd39d1eb 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -215,6 +215,20 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, + return true; + } + ++static void vhost_svq_kick(VhostShadowVirtqueue *svq) ++{ ++ /* ++ * We need to expose the available array entries before checking the used ++ * flags ++ */ ++ smp_mb(); ++ if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) { ++ return; ++ } ++ ++ event_notifier_set(&svq->hdev_kick); ++} ++ + /** + * Add an element to a SVQ. + * +@@ -235,20 +249,6 @@ static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) + return true; + } + +-static void vhost_svq_kick(VhostShadowVirtqueue *svq) +-{ +- /* +- * We need to expose the available array entries before checking the used +- * flags +- */ +- smp_mb(); +- if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) { +- return; +- } +- +- event_notifier_set(&svq->hdev_kick); +-} +- + /** + * Forward available buffers. + * +-- +2.31.1 + diff --git a/kvm-vhost-Track-descriptor-chain-in-private-at-SVQ.patch b/kvm-vhost-Track-descriptor-chain-in-private-at-SVQ.patch new file mode 100644 index 0000000..31bfccc --- /dev/null +++ b/kvm-vhost-Track-descriptor-chain-in-private-at-SVQ.patch @@ -0,0 +1,123 @@ +From 486647551223cc01f4dba87197030bbf4e674f0f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:24:48 +0200 +Subject: [PATCH 01/32] vhost: Track descriptor chain in private at SVQ +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [1/27] 26d16dc383e3064ac6e4288d5c52b39fee0ad204 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 495fe3a78749c39c0e772c4e1a55d6cb8a7e5292 +Author: Eugenio Pérez +Date: Thu May 12 19:57:42 2022 +0200 + + vhost: Track descriptor chain in private at SVQ + + The device could have access to modify them, and it definitely have + access when we implement packed vq. Harden SVQ maintaining a private + copy of the descriptor chain. Other fields like buffer addresses are + already maintained sepparatedly. + + Signed-off-by: Eugenio Pérez + Message-Id: <20220512175747.142058-2-eperezma@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 12 +++++++----- + hw/virtio/vhost-shadow-virtqueue.h | 6 ++++++ + 2 files changed, 13 insertions(+), 5 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index b232803d1b..3155801f50 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -138,6 +138,7 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, + for (n = 0; n < num; n++) { + if (more_descs || (n + 1 < num)) { + descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT); ++ descs[i].next = cpu_to_le16(svq->desc_next[i]); + } else { + descs[i].flags = flags; + } +@@ -145,10 +146,10 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, + descs[i].len = cpu_to_le32(iovec[n].iov_len); + + last = i; +- i = cpu_to_le16(descs[i].next); ++ i = cpu_to_le16(svq->desc_next[i]); + } + +- svq->free_head = le16_to_cpu(descs[last].next); ++ svq->free_head = le16_to_cpu(svq->desc_next[last]); + } + + static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, +@@ -336,7 +337,6 @@ static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq) + static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, + uint32_t *len) + { +- vring_desc_t *descs = svq->vring.desc; + const vring_used_t *used = svq->vring.used; + vring_used_elem_t used_elem; + uint16_t last_used; +@@ -365,7 +365,7 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, + return NULL; + } + +- descs[used_elem.id].next = svq->free_head; ++ svq->desc_next[used_elem.id] = svq->free_head; + svq->free_head = used_elem.id; + + *len = used_elem.len; +@@ -540,8 +540,9 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, + svq->vring.used = qemu_memalign(qemu_real_host_page_size, device_size); + memset(svq->vring.used, 0, device_size); + svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num); ++ svq->desc_next = g_new0(uint16_t, svq->vring.num); + for (unsigned i = 0; i < svq->vring.num - 1; i++) { +- svq->vring.desc[i].next = cpu_to_le16(i + 1); ++ svq->desc_next[i] = cpu_to_le16(i + 1); + } + } + +@@ -574,6 +575,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) + virtqueue_detach_element(svq->vq, next_avail_elem, 0); + } + svq->vq = NULL; ++ g_free(svq->desc_next); + g_free(svq->ring_id_maps); + qemu_vfree(svq->vring.desc); + qemu_vfree(svq->vring.used); +diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h +index e5e24c536d..c132c994e9 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.h ++++ b/hw/virtio/vhost-shadow-virtqueue.h +@@ -53,6 +53,12 @@ typedef struct VhostShadowVirtqueue { + /* Next VirtQueue element that guest made available */ + VirtQueueElement *next_guest_avail_elem; + ++ /* ++ * Backup next field for each descriptor so we can recover securely, not ++ * needing to trust the device access. ++ */ ++ uint16_t *desc_next; ++ + /* Next head to expose to the device */ + uint16_t shadow_avail_idx; + +-- +2.31.1 + diff --git a/kvm-vhost-Track-number-of-descs-in-SVQDescState.patch b/kvm-vhost-Track-number-of-descs-in-SVQDescState.patch new file mode 100644 index 0000000..6a2e147 --- /dev/null +++ b/kvm-vhost-Track-number-of-descs-in-SVQDescState.patch @@ -0,0 +1,81 @@ +From 24b8cf88f53f9fc7cb393c9cad908f759980bfee Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 16/32] vhost: Track number of descs in SVQDescState +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [16/27] 26f30cb6dd35c1eb1ddabe25113431bed3d744aa (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit ac4cfdc6f39c06732d27554523f9d5f8a53b4ffa +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:35 2022 +0200 + + vhost: Track number of descs in SVQDescState + + A guest's buffer continuos on GPA may need multiple descriptors on + qemu's VA, so SVQ should track its length sepparatedly. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 4 ++-- + hw/virtio/vhost-shadow-virtqueue.h | 6 ++++++ + 2 files changed, 8 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index a08e3d4025..4d99075e73 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -257,6 +257,7 @@ static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, + } + + svq->desc_state[qemu_head].elem = elem; ++ svq->desc_state[qemu_head].ndescs = ndescs; + vhost_svq_kick(svq); + return 0; + } +@@ -418,8 +419,7 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, + return NULL; + } + +- num = svq->desc_state[used_elem.id].elem->in_num + +- svq->desc_state[used_elem.id].elem->out_num; ++ num = svq->desc_state[used_elem.id].ndescs; + last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id); + svq->desc_next[last_used_chain] = svq->free_head; + svq->free_head = used_elem.id; +diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h +index d646c35054..5c7e7cbab6 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.h ++++ b/hw/virtio/vhost-shadow-virtqueue.h +@@ -17,6 +17,12 @@ + + typedef struct SVQDescState { + VirtQueueElement *elem; ++ ++ /* ++ * Number of descriptors exposed to the device. May or may not match ++ * guest's ++ */ ++ unsigned int ndescs; + } SVQDescState; + + /* Shadow virtqueue to relay notifications */ +-- +2.31.1 + diff --git a/kvm-vhost-add-vhost_svq_poll.patch b/kvm-vhost-add-vhost_svq_poll.patch new file mode 100644 index 0000000..fa27e5e --- /dev/null +++ b/kvm-vhost-add-vhost_svq_poll.patch @@ -0,0 +1,92 @@ +From 0ab3da1092362470d256b433c546bd365d34f930 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 19/32] vhost: add vhost_svq_poll +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [19/27] 6807bb0bb6e5183b46a03b12b4027c7d767e8555 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 3f44d13dda83d390cc9563e56e7d337e4f6223f4 +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:38 2022 +0200 + + vhost: add vhost_svq_poll + + It allows the Shadow Control VirtQueue to wait for the device to use the + available buffers. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 27 +++++++++++++++++++++++++++ + hw/virtio/vhost-shadow-virtqueue.h | 1 + + 2 files changed, 28 insertions(+) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index cb879e7b88..95d0d7a7ee 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -485,6 +485,33 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq, + } while (!vhost_svq_enable_notification(svq)); + } + ++/** ++ * Poll the SVQ for one device used buffer. ++ * ++ * This function race with main event loop SVQ polling, so extra ++ * synchronization is needed. ++ * ++ * Return the length written by the device. ++ */ ++size_t vhost_svq_poll(VhostShadowVirtqueue *svq) ++{ ++ int64_t start_us = g_get_monotonic_time(); ++ do { ++ uint32_t len; ++ VirtQueueElement *elem = vhost_svq_get_buf(svq, &len); ++ if (elem) { ++ return len; ++ } ++ ++ if (unlikely(g_get_monotonic_time() - start_us > 10e6)) { ++ return 0; ++ } ++ ++ /* Make sure we read new used_idx */ ++ smp_rmb(); ++ } while (true); ++} ++ + /** + * Forward used buffers. + * +diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h +index dd78f4bec2..cf442f7dea 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.h ++++ b/hw/virtio/vhost-shadow-virtqueue.h +@@ -89,6 +89,7 @@ void vhost_svq_push_elem(VhostShadowVirtqueue *svq, + int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, + size_t out_num, const struct iovec *in_sg, size_t in_num, + VirtQueueElement *elem); ++size_t vhost_svq_poll(VhostShadowVirtqueue *svq); + + void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd); + void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd); +-- +2.31.1 + diff --git a/kvm-vhost-add-vhost_svq_push_elem.patch b/kvm-vhost-add-vhost_svq_push_elem.patch new file mode 100644 index 0000000..2a9ec40 --- /dev/null +++ b/kvm-vhost-add-vhost_svq_push_elem.patch @@ -0,0 +1,83 @@ +From a26eb02b3a49c5d1163685ba5b83b67138c09047 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 17/32] vhost: add vhost_svq_push_elem +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [17/27] d064b40a262f2dfdc9f648d250aa8c8020c40385 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 432efd144e990b6e040862de25f8f0b6a6eeb03d +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:36 2022 +0200 + + vhost: add vhost_svq_push_elem + + This function allows external SVQ users to return guest's available + buffers. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 16 ++++++++++++++++ + hw/virtio/vhost-shadow-virtqueue.h | 3 +++ + 2 files changed, 19 insertions(+) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 4d99075e73..1ce52d5b4a 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -428,6 +428,22 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, + return g_steal_pointer(&svq->desc_state[used_elem.id].elem); + } + ++/** ++ * Push an element to SVQ, returning it to the guest. ++ */ ++void vhost_svq_push_elem(VhostShadowVirtqueue *svq, ++ const VirtQueueElement *elem, uint32_t len) ++{ ++ virtqueue_push(svq->vq, elem, len); ++ if (svq->next_guest_avail_elem) { ++ /* ++ * Avail ring was full when vhost_svq_flush was called, so it's a ++ * good moment to make more descriptors available if possible. ++ */ ++ vhost_handle_guest_kick(svq); ++ } ++} ++ + static void vhost_svq_flush(VhostShadowVirtqueue *svq, + bool check_for_avail_queue) + { +diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h +index 5c7e7cbab6..d9fc1f1799 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.h ++++ b/hw/virtio/vhost-shadow-virtqueue.h +@@ -84,6 +84,9 @@ typedef struct VhostShadowVirtqueue { + + bool vhost_svq_valid_features(uint64_t features, Error **errp); + ++void vhost_svq_push_elem(VhostShadowVirtqueue *svq, ++ const VirtQueueElement *elem, uint32_t len); ++ + void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd); + void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd); + void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq, +-- +2.31.1 + diff --git a/kvm-vhost-move-descriptor-translation-to-vhost_svq_vring.patch b/kvm-vhost-move-descriptor-translation-to-vhost_svq_vring.patch new file mode 100644 index 0000000..08bcaf2 --- /dev/null +++ b/kvm-vhost-move-descriptor-translation-to-vhost_svq_vring.patch @@ -0,0 +1,120 @@ +From 2bdea90bfbce3b8d5bfa86178a942a470b85b835 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 07/32] vhost: move descriptor translation to + vhost_svq_vring_write_descs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [7/27] 5533c72065e4ebf8ea7db966c976a3b29bdafb82 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 009c2549bb9dc7f7061009eb87f2a53d4b364983 +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:26 2022 +0200 + + vhost: move descriptor translation to vhost_svq_vring_write_descs + + It's done for both in and out descriptors so it's better placed here. + + Acked-by: Jason Wang + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 38 +++++++++++++++++++++--------- + 1 file changed, 27 insertions(+), 11 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 06d0bb39d9..3fbda1e3d4 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -122,17 +122,35 @@ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq, + return true; + } + +-static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, +- const struct iovec *iovec, size_t num, +- bool more_descs, bool write) ++/** ++ * Write descriptors to SVQ vring ++ * ++ * @svq: The shadow virtqueue ++ * @sg: Cache for hwaddr ++ * @iovec: The iovec from the guest ++ * @num: iovec length ++ * @more_descs: True if more descriptors come in the chain ++ * @write: True if they are writeable descriptors ++ * ++ * Return true if success, false otherwise and print error. ++ */ ++static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, ++ const struct iovec *iovec, size_t num, ++ bool more_descs, bool write) + { + uint16_t i = svq->free_head, last = svq->free_head; + unsigned n; + uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0; + vring_desc_t *descs = svq->vring.desc; ++ bool ok; + + if (num == 0) { +- return; ++ return true; ++ } ++ ++ ok = vhost_svq_translate_addr(svq, sg, iovec, num); ++ if (unlikely(!ok)) { ++ return false; + } + + for (n = 0; n < num; n++) { +@@ -150,6 +168,7 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, + } + + svq->free_head = le16_to_cpu(svq->desc_next[last]); ++ return true; + } + + static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, +@@ -169,21 +188,18 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, + return false; + } + +- ok = vhost_svq_translate_addr(svq, sgs, elem->out_sg, elem->out_num); ++ ok = vhost_svq_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num, ++ elem->in_num > 0, false); + if (unlikely(!ok)) { + return false; + } +- vhost_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num, +- elem->in_num > 0, false); +- + +- ok = vhost_svq_translate_addr(svq, sgs, elem->in_sg, elem->in_num); ++ ok = vhost_svq_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, ++ true); + if (unlikely(!ok)) { + return false; + } + +- vhost_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, true); +- + /* + * Put the entry in the available array (but don't update avail->idx until + * they do sync). +-- +2.31.1 + diff --git a/kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch b/kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch new file mode 100644 index 0000000..70e8f59 --- /dev/null +++ b/kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch @@ -0,0 +1,56 @@ +From edb2bd99355f300b512c040e91f5870ea14a5d7e Mon Sep 17 00:00:00 2001 +From: Si-Wei Liu +Date: Fri, 6 May 2022 19:28:15 -0700 +Subject: [PATCH 11/16] vhost-net: fix improper cleanup in vhost_net_start +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jason Wang +RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA +RH-Commit: [4/7] e88e482dd4b344f0cc887a358268beaed4d62917 (jasowang/qemu-kvm-cs) +RH-Bugzilla: 2070804 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu + +vhost_net_start() missed a corresponding stop_one() upon error from +vhost_set_vring_enable(). While at it, make the error handling for +err_start more robust. No real issue was found due to this though. + +Signed-off-by: Si-Wei Liu +Acked-by: Jason Wang +Message-Id: <1651890498-24478-5-git-send-email-si-wei.liu@oracle.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 6f3910b5eee00b8cc959e94659c0d524c482a418) +Signed-off-by: Jason Wang +--- + hw/net/vhost_net.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c +index 30379d2ca4..d6d7c51f62 100644 +--- a/hw/net/vhost_net.c ++++ b/hw/net/vhost_net.c +@@ -381,6 +381,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, + r = vhost_set_vring_enable(peer, peer->vring_enable); + + if (r < 0) { ++ vhost_net_stop_one(get_vhost_net(peer), dev); + goto err_start; + } + } +@@ -390,7 +391,8 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, + + err_start: + while (--i >= 0) { +- peer = qemu_get_peer(ncs , i); ++ peer = qemu_get_peer(ncs, i < data_queue_pairs ? ++ i : n->max_queue_pairs); + vhost_net_stop_one(get_vhost_net(peer), dev); + } + e = k->set_guest_notifiers(qbus->parent, total_notifiers, false); +-- +2.31.1 + diff --git a/kvm-vhost-net-vdpa-add-stubs-for-when-no-virtio-net-devi.patch b/kvm-vhost-net-vdpa-add-stubs-for-when-no-virtio-net-devi.patch new file mode 100644 index 0000000..31677fd --- /dev/null +++ b/kvm-vhost-net-vdpa-add-stubs-for-when-no-virtio-net-devi.patch @@ -0,0 +1,87 @@ +From a9095850da8dd4ea3fdb725cb7f79118144e22fa Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:39:27 +0200 +Subject: [PATCH 22/32] vhost-net-vdpa: add stubs for when no virtio-net device + is present +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [22/27] a2b25a805bb06094a5fab27ce8f82bee12a9fcb5 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 94c643732dc110d04bbdf0eb43c41bce23b3593e +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:41 2022 +0200 + + vhost-net-vdpa: add stubs for when no virtio-net device is present + + net/vhost-vdpa.c will need functions that are declared in + vhost-shadow-virtqueue.c, that needs functions of virtio-net.c. + + Copy the vhost-vdpa-stub.c code so + only the constructor net_init_vhost_vdpa needs to be defined. + + Signed-off-by: Eugenio Pérez + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + net/meson.build | 3 ++- + net/vhost-vdpa-stub.c | 21 +++++++++++++++++++++ + 2 files changed, 23 insertions(+), 1 deletion(-) + create mode 100644 net/vhost-vdpa-stub.c + +diff --git a/net/meson.build b/net/meson.build +index c965e83b26..116a9e7cbb 100644 +--- a/net/meson.build ++++ b/net/meson.build +@@ -41,7 +41,8 @@ endif + softmmu_ss.add(when: 'CONFIG_POSIX', if_true: files(tap_posix)) + softmmu_ss.add(when: 'CONFIG_WIN32', if_true: files('tap-win32.c')) + if have_vhost_net_vdpa +- softmmu_ss.add(files('vhost-vdpa.c')) ++ softmmu_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost-vdpa.c'), if_false: files('vhost-vdpa-stub.c')) ++ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-vdpa-stub.c')) + endif + + subdir('can') +diff --git a/net/vhost-vdpa-stub.c b/net/vhost-vdpa-stub.c +new file mode 100644 +index 0000000000..1732ed2443 +--- /dev/null ++++ b/net/vhost-vdpa-stub.c +@@ -0,0 +1,21 @@ ++/* ++ * vhost-vdpa-stub.c ++ * ++ * Copyright (c) 2022 Red Hat, Inc. ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ * ++ */ ++ ++#include "qemu/osdep.h" ++#include "clients.h" ++#include "net/vhost-vdpa.h" ++#include "qapi/error.h" ++ ++int net_init_vhost_vdpa(const Netdev *netdev, const char *name, ++ NetClientState *peer, Error **errp) ++{ ++ error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*"); ++ return -1; ++} +-- +2.31.1 + diff --git a/kvm-vhost-vdpa-backend-feature-should-set-only-once.patch b/kvm-vhost-vdpa-backend-feature-should-set-only-once.patch new file mode 100644 index 0000000..747bf5f --- /dev/null +++ b/kvm-vhost-vdpa-backend-feature-should-set-only-once.patch @@ -0,0 +1,58 @@ +From 46c5a35aa56cf0dd55376638dbf7d46e85f497e1 Mon Sep 17 00:00:00 2001 +From: Si-Wei Liu +Date: Fri, 6 May 2022 19:28:16 -0700 +Subject: [PATCH 12/16] vhost-vdpa: backend feature should set only once +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jason Wang +RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA +RH-Commit: [5/7] 7531bb8da0c99b29997e8bfc6d1e811daf3cdd38 (jasowang/qemu-kvm-cs) +RH-Bugzilla: 2070804 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu + +The vhost_vdpa_one_time_request() branch in +vhost_vdpa_set_backend_cap() incorrectly sends down +ioctls on vhost_dev with non-zero index. This may +end up with multiple VHOST_SET_BACKEND_FEATURES +ioctl calls sent down on the vhost-vdpa fd that is +shared between all these vhost_dev's. + +To fix it, send down ioctl only once via the first +vhost_dev with index 0. Toggle the polarity of the +vhost_vdpa_one_time_request() test should do the +trick. + +Fixes: 4d191cfdc7de ("vhost-vdpa: classify one time request") +Signed-off-by: Si-Wei Liu +Reviewed-by: Stefano Garzarella +Acked-by: Jason Wang +Acked-by: Eugenio Pérez +Message-Id: <1651890498-24478-6-git-send-email-si-wei.liu@oracle.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 6aee7e4233f6467f69531fcd352adff028f3f5ea) +Signed-off-by: Jason Wang +--- + hw/virtio/vhost-vdpa.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 8adf7c0b92..6e3dbd9e89 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -665,7 +665,7 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) + + features &= f; + +- if (vhost_vdpa_one_time_request(dev)) { ++ if (!vhost_vdpa_one_time_request(dev)) { + r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); + if (r) { + return -EFAULT; +-- +2.31.1 + diff --git a/kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch b/kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch new file mode 100644 index 0000000..2466557 --- /dev/null +++ b/kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch @@ -0,0 +1,123 @@ +From 58acdab17ec00ab76105ab92a51c5ba4dec3df5a Mon Sep 17 00:00:00 2001 +From: Si-Wei Liu +Date: Fri, 6 May 2022 19:28:17 -0700 +Subject: [PATCH 13/16] vhost-vdpa: change name and polarity for + vhost_vdpa_one_time_request() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jason Wang +RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA +RH-Commit: [6/7] 7029778f463a136ff412c63b86b6953390e47bf8 (jasowang/qemu-kvm-cs) +RH-Bugzilla: 2070804 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu + +The name vhost_vdpa_one_time_request() was confusing. No +matter whatever it returns, its typical occurrence had +always been at requests that only need to be applied once. +And the name didn't suggest what it actually checks for. +Change it to vhost_vdpa_first_dev() with polarity flipped +for better readibility of code. That way it is able to +reflect what the check is really about. + +This call is applicable to request which performs operation +only once, before queues are set up, and usually at the beginning +of the caller function. Document the requirement for it in place. + +Signed-off-by: Si-Wei Liu +Message-Id: <1651890498-24478-7-git-send-email-si-wei.liu@oracle.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Stefano Garzarella +Acked-by: Jason Wang +(cherry picked from commit d71b0609fc04217e28d17009f04d74b08be6f466) +Signed-off-by: Jason Wang +--- + hw/virtio/vhost-vdpa.c | 23 +++++++++++++++-------- + 1 file changed, 15 insertions(+), 8 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 6e3dbd9e89..33dcaa135e 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -366,11 +366,18 @@ static void vhost_vdpa_get_iova_range(struct vhost_vdpa *v) + v->iova_range.last); + } + +-static bool vhost_vdpa_one_time_request(struct vhost_dev *dev) ++/* ++ * The use of this function is for requests that only need to be ++ * applied once. Typically such request occurs at the beginning ++ * of operation, and before setting up queues. It should not be ++ * used for request that performs operation until all queues are ++ * set, which would need to check dev->vq_index_end instead. ++ */ ++static bool vhost_vdpa_first_dev(struct vhost_dev *dev) + { + struct vhost_vdpa *v = dev->opaque; + +- return v->index != 0; ++ return v->index == 0; + } + + static int vhost_vdpa_get_dev_features(struct vhost_dev *dev, +@@ -451,7 +458,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) + + vhost_vdpa_get_iova_range(v); + +- if (vhost_vdpa_one_time_request(dev)) { ++ if (!vhost_vdpa_first_dev(dev)) { + return 0; + } + +@@ -594,7 +601,7 @@ static int vhost_vdpa_memslots_limit(struct vhost_dev *dev) + static int vhost_vdpa_set_mem_table(struct vhost_dev *dev, + struct vhost_memory *mem) + { +- if (vhost_vdpa_one_time_request(dev)) { ++ if (!vhost_vdpa_first_dev(dev)) { + return 0; + } + +@@ -623,7 +630,7 @@ static int vhost_vdpa_set_features(struct vhost_dev *dev, + struct vhost_vdpa *v = dev->opaque; + int ret; + +- if (vhost_vdpa_one_time_request(dev)) { ++ if (!vhost_vdpa_first_dev(dev)) { + return 0; + } + +@@ -665,7 +672,7 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) + + features &= f; + +- if (!vhost_vdpa_one_time_request(dev)) { ++ if (vhost_vdpa_first_dev(dev)) { + r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); + if (r) { + return -EFAULT; +@@ -1118,7 +1125,7 @@ static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, + struct vhost_log *log) + { + struct vhost_vdpa *v = dev->opaque; +- if (v->shadow_vqs_enabled || vhost_vdpa_one_time_request(dev)) { ++ if (v->shadow_vqs_enabled || !vhost_vdpa_first_dev(dev)) { + return 0; + } + +@@ -1240,7 +1247,7 @@ static int vhost_vdpa_get_features(struct vhost_dev *dev, + + static int vhost_vdpa_set_owner(struct vhost_dev *dev) + { +- if (vhost_vdpa_one_time_request(dev)) { ++ if (!vhost_vdpa_first_dev(dev)) { + return 0; + } + +-- +2.31.1 + diff --git a/kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch b/kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch new file mode 100644 index 0000000..7716cbf --- /dev/null +++ b/kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch @@ -0,0 +1,48 @@ +From 3142102adb98f46518c0ac1773b0c48710c6bed6 Mon Sep 17 00:00:00 2001 +From: Si-Wei Liu +Date: Fri, 6 May 2022 19:28:14 -0700 +Subject: [PATCH 10/16] vhost-vdpa: fix improper cleanup in net_init_vhost_vdpa +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jason Wang +RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA +RH-Commit: [3/7] c83ff6c97d34cfae3c3447edde934b42a9ace75f (jasowang/qemu-kvm-cs) +RH-Bugzilla: 2070804 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu + +... such that no memory leaks on dangling net clients in case of +error. + +Signed-off-by: Si-Wei Liu +Acked-by: Jason Wang +Message-Id: <1651890498-24478-4-git-send-email-si-wei.liu@oracle.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 9bd055073e375c8a0d7ebce925e05d914d69fc7f) +Signed-off-by: Jason Wang +--- + net/vhost-vdpa.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 1e9fe47c03..df1e69ee72 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -306,7 +306,9 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + + err: + if (i) { +- qemu_del_net_client(ncs[0]); ++ for (i--; i >= 0; i--) { ++ qemu_del_net_client(ncs[i]); ++ } + } + qemu_close(vdpa_device_fd); + +-- +2.31.1 + diff --git a/kvm-vhost-vsock-detach-the-virqueue-element-in-case-of-e.patch b/kvm-vhost-vsock-detach-the-virqueue-element-in-case-of-e.patch new file mode 100644 index 0000000..50013c9 --- /dev/null +++ b/kvm-vhost-vsock-detach-the-virqueue-element-in-case-of-e.patch @@ -0,0 +1,76 @@ +From ff4e95d8652dadfed09913c7968514a2a7f36591 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Thu, 14 Apr 2022 10:38:26 -0400 +Subject: [PATCH 2/2] vhost-vsock: detach the virqueue element in case of error + +RH-Author: Jon Maloy +RH-MergeRequest: 153: vhost-vsock: detach the virqueue element in case of error +RH-Commit: [1/1] 024dbc9073fddbe89a8ae8eb201f5bc674bffb64 (jmaloy/qemu-kvm) +RH-Bugzilla: 2063262 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2063262 +Upstream: Merged +CVE: CVE-2022-26354 + +commit 8d1b247f3748ac4078524130c6d7ae42b6140aaf +Author: Stefano Garzarella +Date: Mon Feb 28 10:50:58 2022 +0100 + + vhost-vsock: detach the virqueue element in case of error + + In vhost_vsock_common_send_transport_reset(), if an element popped from + the virtqueue is invalid, we should call virtqueue_detach_element() to + detach it from the virtqueue before freeing its memory. + + Fixes: fc0b9b0e1c ("vhost-vsock: add virtio sockets device") + Fixes: CVE-2022-26354 + Cc: qemu-stable@nongnu.org + Reported-by: VictorV + Signed-off-by: Stefano Garzarella + Message-Id: <20220228095058.27899-1-sgarzare@redhat.com> + Reviewed-by: Stefan Hajnoczi + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit 8d1b247f3748ac4078524130c6d7ae42b6140aaf) +Signed-off-by: Jon Maloy +--- + hw/virtio/vhost-vsock-common.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c +index 3f3771274e..ed706681ac 100644 +--- a/hw/virtio/vhost-vsock-common.c ++++ b/hw/virtio/vhost-vsock-common.c +@@ -153,19 +153,23 @@ static void vhost_vsock_common_send_transport_reset(VHostVSockCommon *vvc) + if (elem->out_num) { + error_report("invalid vhost-vsock event virtqueue element with " + "out buffers"); +- goto out; ++ goto err; + } + + if (iov_from_buf(elem->in_sg, elem->in_num, 0, + &event, sizeof(event)) != sizeof(event)) { + error_report("vhost-vsock event virtqueue element is too short"); +- goto out; ++ goto err; + } + + virtqueue_push(vq, elem, sizeof(event)); + virtio_notify(VIRTIO_DEVICE(vvc), vq); + +-out: ++ g_free(elem); ++ return; ++ ++err: ++ virtqueue_detach_element(vq, elem, 0); + g_free(elem); + } + +-- +2.27.0 + diff --git a/kvm-virtio-iommu-Add-an-assert-check-in-translate-routin.patch b/kvm-virtio-iommu-Add-an-assert-check-in-translate-routin.patch new file mode 100644 index 0000000..2a72cc7 --- /dev/null +++ b/kvm-virtio-iommu-Add-an-assert-check-in-translate-routin.patch @@ -0,0 +1,46 @@ +From 643d9c28ff8b15c333cc748c5e712659ad2a257c Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Mon, 13 Jun 2022 14:10:10 +0800 +Subject: [PATCH 03/17] virtio-iommu: Add an assert check in translate routine + +RH-Author: Eric Auger +RH-MergeRequest: 105: virtio-iommu: Fix bypass mode for assigned devices +RH-Commit: [3/5] 19f309fd0beda40d65f51c454e37936658ac9f38 (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2100106 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Peter Xu +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2100106 + +With address space switch supported, dma access translation only +happen after endpoint is attached to a non-bypass domain. + +Signed-off-by: Zhenzhong Duan +Message-Id: <20220613061010.2674054-4-zhenzhong.duan@intel.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 23b5f0ff6d923d3bca11cf44eed3daf7a0a836a8) +Signed-off-by: Eric Auger +--- + hw/virtio/virtio-iommu.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c +index 440a1c28a7..e970d4d5a6 100644 +--- a/hw/virtio/virtio-iommu.c ++++ b/hw/virtio/virtio-iommu.c +@@ -866,6 +866,10 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr, + qemu_rec_mutex_lock(&s->mutex); + + ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid)); ++ ++ if (bypass_allowed) ++ assert(ep && ep->domain && !ep->domain->bypass); ++ + if (!ep) { + if (!bypass_allowed) { + error_report_once("%s sid=%d is not known!!", __func__, sid); +-- +2.31.1 + diff --git a/kvm-virtio-iommu-Add-bypass-mode-support-to-assigned-dev.patch b/kvm-virtio-iommu-Add-bypass-mode-support-to-assigned-dev.patch new file mode 100644 index 0000000..3352666 --- /dev/null +++ b/kvm-virtio-iommu-Add-bypass-mode-support-to-assigned-dev.patch @@ -0,0 +1,250 @@ +From d60774ee3168eefb21a4120a38107cd36ae17e07 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Mon, 13 Jun 2022 14:10:08 +0800 +Subject: [PATCH 01/17] virtio-iommu: Add bypass mode support to assigned + device + +RH-Author: Eric Auger +RH-MergeRequest: 105: virtio-iommu: Fix bypass mode for assigned devices +RH-Commit: [1/5] 4777815533b31c7f4f09af8902e378fd3fc1186a (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2100106 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Peter Xu +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2100106 + +Currently assigned devices can not work in virtio-iommu bypass mode. +Guest driver fails to probe the device due to DMA failure. And the +reason is because of lacking GPA -> HPA mappings when VM is created. + +Add a root container memory region to hold both bypass memory region +and iommu memory region, so the switch between them is supported +just like the implementation in virtual VT-d. + +Signed-off-by: Zhenzhong Duan +Message-Id: <20220613061010.2674054-2-zhenzhong.duan@intel.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 90519b90539b16258d1d52b908b199f44877dc18) +Signed-off-by: Eric Auger +--- + hw/virtio/trace-events | 1 + + hw/virtio/virtio-iommu.c | 115 ++++++++++++++++++++++++++++++- + include/hw/virtio/virtio-iommu.h | 2 + + 3 files changed, 116 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events +index a5102eac9e..2ab5881b88 100644 +--- a/hw/virtio/trace-events ++++ b/hw/virtio/trace-events +@@ -114,6 +114,7 @@ virtio_iommu_remap(const char *name, uint64_t virt_start, uint64_t virt_end, uin + virtio_iommu_set_page_size_mask(const char *name, uint64_t old, uint64_t new) "mr=%s old_mask=0x%"PRIx64" new_mask=0x%"PRIx64 + virtio_iommu_notify_flag_add(const char *name) "add notifier to mr %s" + virtio_iommu_notify_flag_del(const char *name) "del notifier from mr %s" ++virtio_iommu_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)" + + # virtio-mem.c + virtio_mem_send_response(uint16_t type) "type=%" PRIu16 +diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c +index 6d5ea0bdf1..5e99e6c62b 100644 +--- a/hw/virtio/virtio-iommu.c ++++ b/hw/virtio/virtio-iommu.c +@@ -70,6 +70,77 @@ static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice *dev) + return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn); + } + ++static bool virtio_iommu_device_bypassed(IOMMUDevice *sdev) ++{ ++ uint32_t sid; ++ bool bypassed; ++ VirtIOIOMMU *s = sdev->viommu; ++ VirtIOIOMMUEndpoint *ep; ++ ++ sid = virtio_iommu_get_bdf(sdev); ++ ++ qemu_mutex_lock(&s->mutex); ++ /* need to check bypass before system reset */ ++ if (!s->endpoints) { ++ bypassed = s->config.bypass; ++ goto unlock; ++ } ++ ++ ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid)); ++ if (!ep || !ep->domain) { ++ bypassed = s->config.bypass; ++ } else { ++ bypassed = ep->domain->bypass; ++ } ++ ++unlock: ++ qemu_mutex_unlock(&s->mutex); ++ return bypassed; ++} ++ ++/* Return whether the device is using IOMMU translation. */ ++static bool virtio_iommu_switch_address_space(IOMMUDevice *sdev) ++{ ++ bool use_remapping; ++ ++ assert(sdev); ++ ++ use_remapping = !virtio_iommu_device_bypassed(sdev); ++ ++ trace_virtio_iommu_switch_address_space(pci_bus_num(sdev->bus), ++ PCI_SLOT(sdev->devfn), ++ PCI_FUNC(sdev->devfn), ++ use_remapping); ++ ++ /* Turn off first then on the other */ ++ if (use_remapping) { ++ memory_region_set_enabled(&sdev->bypass_mr, false); ++ memory_region_set_enabled(MEMORY_REGION(&sdev->iommu_mr), true); ++ } else { ++ memory_region_set_enabled(MEMORY_REGION(&sdev->iommu_mr), false); ++ memory_region_set_enabled(&sdev->bypass_mr, true); ++ } ++ ++ return use_remapping; ++} ++ ++static void virtio_iommu_switch_address_space_all(VirtIOIOMMU *s) ++{ ++ GHashTableIter iter; ++ IOMMUPciBus *iommu_pci_bus; ++ int i; ++ ++ g_hash_table_iter_init(&iter, s->as_by_busptr); ++ while (g_hash_table_iter_next(&iter, NULL, (void **)&iommu_pci_bus)) { ++ for (i = 0; i < PCI_DEVFN_MAX; i++) { ++ if (!iommu_pci_bus->pbdev[i]) { ++ continue; ++ } ++ virtio_iommu_switch_address_space(iommu_pci_bus->pbdev[i]); ++ } ++ } ++} ++ + /** + * The bus number is used for lookup when SID based operations occur. + * In that case we lazily populate the IOMMUPciBus array from the bus hash +@@ -214,6 +285,7 @@ static gboolean virtio_iommu_notify_map_cb(gpointer key, gpointer value, + static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep) + { + VirtIOIOMMUDomain *domain = ep->domain; ++ IOMMUDevice *sdev = container_of(ep->iommu_mr, IOMMUDevice, iommu_mr); + + if (!ep->domain) { + return; +@@ -222,6 +294,7 @@ static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep) + ep->iommu_mr); + QLIST_REMOVE(ep, next); + ep->domain = NULL; ++ virtio_iommu_switch_address_space(sdev); + } + + static VirtIOIOMMUEndpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s, +@@ -324,12 +397,39 @@ static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque, + + trace_virtio_iommu_init_iommu_mr(name); + ++ memory_region_init(&sdev->root, OBJECT(s), name, UINT64_MAX); ++ address_space_init(&sdev->as, &sdev->root, TYPE_VIRTIO_IOMMU); ++ ++ /* ++ * Build the IOMMU disabled container with aliases to the ++ * shared MRs. Note that aliasing to a shared memory region ++ * could help the memory API to detect same FlatViews so we ++ * can have devices to share the same FlatView when in bypass ++ * mode. (either by not configuring virtio-iommu driver or with ++ * "iommu=pt"). It will greatly reduce the total number of ++ * FlatViews of the system hence VM runs faster. ++ */ ++ memory_region_init_alias(&sdev->bypass_mr, OBJECT(s), ++ "system", get_system_memory(), 0, ++ memory_region_size(get_system_memory())); ++ + memory_region_init_iommu(&sdev->iommu_mr, sizeof(sdev->iommu_mr), + TYPE_VIRTIO_IOMMU_MEMORY_REGION, + OBJECT(s), name, + UINT64_MAX); +- address_space_init(&sdev->as, +- MEMORY_REGION(&sdev->iommu_mr), TYPE_VIRTIO_IOMMU); ++ ++ /* ++ * Hook both the containers under the root container, we ++ * switch between iommu & bypass MRs by enable/disable ++ * corresponding sub-containers ++ */ ++ memory_region_add_subregion_overlap(&sdev->root, 0, ++ MEMORY_REGION(&sdev->iommu_mr), ++ 0); ++ memory_region_add_subregion_overlap(&sdev->root, 0, ++ &sdev->bypass_mr, 0); ++ ++ virtio_iommu_switch_address_space(sdev); + g_free(name); + } + return &sdev->as; +@@ -343,6 +443,7 @@ static int virtio_iommu_attach(VirtIOIOMMU *s, + uint32_t flags = le32_to_cpu(req->flags); + VirtIOIOMMUDomain *domain; + VirtIOIOMMUEndpoint *ep; ++ IOMMUDevice *sdev; + + trace_virtio_iommu_attach(domain_id, ep_id); + +@@ -376,6 +477,8 @@ static int virtio_iommu_attach(VirtIOIOMMU *s, + QLIST_INSERT_HEAD(&domain->endpoint_list, ep, next); + + ep->domain = domain; ++ sdev = container_of(ep->iommu_mr, IOMMUDevice, iommu_mr); ++ virtio_iommu_switch_address_space(sdev); + + /* Replay domain mappings on the associated memory region */ + g_tree_foreach(domain->mappings, virtio_iommu_notify_map_cb, +@@ -888,6 +991,7 @@ static void virtio_iommu_set_config(VirtIODevice *vdev, + return; + } + dev_config->bypass = in_config->bypass; ++ virtio_iommu_switch_address_space_all(dev); + } + + trace_virtio_iommu_set_config(in_config->bypass); +@@ -1027,6 +1131,8 @@ static void virtio_iommu_system_reset(void *opaque) + * system reset + */ + s->config.bypass = s->boot_bypass; ++ virtio_iommu_switch_address_space_all(s); ++ + } + + static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) +@@ -1043,6 +1149,11 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) + virtio_iommu_handle_command); + s->event_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, NULL); + ++ /* ++ * config.bypass is needed to get initial address space early, such as ++ * in vfio realize ++ */ ++ s->config.bypass = s->boot_bypass; + s->config.page_size_mask = TARGET_PAGE_MASK; + s->config.input_range.end = UINT64_MAX; + s->config.domain_range.end = UINT32_MAX; +diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h +index 84391f8448..102eeefa73 100644 +--- a/include/hw/virtio/virtio-iommu.h ++++ b/include/hw/virtio/virtio-iommu.h +@@ -37,6 +37,8 @@ typedef struct IOMMUDevice { + int devfn; + IOMMUMemoryRegion iommu_mr; + AddressSpace as; ++ MemoryRegion root; /* The root container of the device */ ++ MemoryRegion bypass_mr; /* The alias of shared memory MR */ + } IOMMUDevice; + + typedef struct IOMMUPciBus { +-- +2.31.1 + diff --git a/kvm-virtio-iommu-Fix-migration-regression.patch b/kvm-virtio-iommu-Fix-migration-regression.patch new file mode 100644 index 0000000..f5ae4d6 --- /dev/null +++ b/kvm-virtio-iommu-Fix-migration-regression.patch @@ -0,0 +1,54 @@ +From 8d45902b4884315ec090e607e9f03606b21001cf Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Fri, 24 Jun 2022 17:37:40 +0800 +Subject: [PATCH 05/17] virtio-iommu: Fix migration regression + +RH-Author: Eric Auger +RH-MergeRequest: 105: virtio-iommu: Fix bypass mode for assigned devices +RH-Commit: [5/5] 9652c4aaaf88e24083fab1fbc3d1423260c93ca6 (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2100106 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Peter Xu +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2100106 + +We also need to switch to the right address space on dest side +after loading the device status. DMA to wrong address space is +destructive. + +Fixes: 3facd774962fd ("virtio-iommu: Add bypass mode support to assigned device") +Suggested-by: Eric Auger +Signed-off-by: Zhenzhong Duan +Message-Id: <20220624093740.3525267-1-zhenzhong.duan@intel.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Eric Auger +(cherry picked from commit d355566bd958e24e7e384da6ea89a9fc88d7bfed) +Signed-off-by: Eric Auger +--- + hw/virtio/virtio-iommu.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c +index 44a041dec9..2012835554 100644 +--- a/hw/virtio/virtio-iommu.c ++++ b/hw/virtio/virtio-iommu.c +@@ -1324,6 +1324,14 @@ static int iommu_post_load(void *opaque, int version_id) + VirtIOIOMMU *s = opaque; + + g_tree_foreach(s->domains, reconstruct_endpoints, s); ++ ++ /* ++ * Memory regions are dynamically turned on/off depending on ++ * 'config.bypass' and attached domain type if there is. After ++ * migration, we need to make sure the memory regions are ++ * still correct. ++ */ ++ virtio_iommu_switch_address_space_all(s); + return 0; + } + +-- +2.31.1 + diff --git a/kvm-virtio-iommu-Fix-the-partial-copy-of-probe-request.patch b/kvm-virtio-iommu-Fix-the-partial-copy-of-probe-request.patch new file mode 100644 index 0000000..7747bfe --- /dev/null +++ b/kvm-virtio-iommu-Fix-the-partial-copy-of-probe-request.patch @@ -0,0 +1,67 @@ +From b681247c29b59af40c86f8f0ae5709138ae9bf1a Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Thu, 23 Jun 2022 10:31:52 +0800 +Subject: [PATCH 04/17] virtio-iommu: Fix the partial copy of probe request + +RH-Author: Eric Auger +RH-MergeRequest: 105: virtio-iommu: Fix bypass mode for assigned devices +RH-Commit: [4/5] c402164414a8e69bbb6df20af3c2b6d2589d6f3e (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2100106 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Peter Xu +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2100106 + +The structure of probe request doesn't include the tail, this leads +to a few field missed to be copied. Currently this isn't an issue as +those missed field belong to reserved field, just in case reserved +field will be used in the future. + +Changed 4th parameter of virtio_iommu_iov_to_req() to receive size +of device-readable part. + +Fixes: 1733eebb9e75b ("virtio-iommu: Implement RESV_MEM probe request") +Signed-off-by: Zhenzhong Duan +Message-Id: <20220623023152.3473231-1-zhenzhong.duan@intel.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Jean-Philippe Brucker +Reviewed-by: Eric Auger +(cherry picked from commit 45461aace83d961e933b27519b81d17b4c690514) +Signed-off-by: Eric Auger +--- + hw/virtio/virtio-iommu.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c +index e970d4d5a6..44a041dec9 100644 +--- a/hw/virtio/virtio-iommu.c ++++ b/hw/virtio/virtio-iommu.c +@@ -676,11 +676,10 @@ static int virtio_iommu_probe(VirtIOIOMMU *s, + + static int virtio_iommu_iov_to_req(struct iovec *iov, + unsigned int iov_cnt, +- void *req, size_t req_sz) ++ void *req, size_t payload_sz) + { +- size_t sz, payload_sz = req_sz - sizeof(struct virtio_iommu_req_tail); ++ size_t sz = iov_to_buf(iov, iov_cnt, 0, req, payload_sz); + +- sz = iov_to_buf(iov, iov_cnt, 0, req, payload_sz); + if (unlikely(sz != payload_sz)) { + return VIRTIO_IOMMU_S_INVAL; + } +@@ -693,7 +692,8 @@ static int virtio_iommu_handle_ ## __req(VirtIOIOMMU *s, \ + unsigned int iov_cnt) \ + { \ + struct virtio_iommu_req_ ## __req req; \ +- int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, sizeof(req)); \ ++ int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, \ ++ sizeof(req) - sizeof(struct virtio_iommu_req_tail));\ + \ + return ret ? ret : virtio_iommu_ ## __req(s, &req); \ + } +-- +2.31.1 + diff --git a/kvm-virtio-iommu-Use-recursive-lock-to-avoid-deadlock.patch b/kvm-virtio-iommu-Use-recursive-lock-to-avoid-deadlock.patch new file mode 100644 index 0000000..df961b0 --- /dev/null +++ b/kvm-virtio-iommu-Use-recursive-lock-to-avoid-deadlock.patch @@ -0,0 +1,141 @@ +From 881c999e302e7ee1212b47c523a2cf442c549417 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Mon, 13 Jun 2022 14:10:09 +0800 +Subject: [PATCH 02/17] virtio-iommu: Use recursive lock to avoid deadlock + +RH-Author: Eric Auger +RH-MergeRequest: 105: virtio-iommu: Fix bypass mode for assigned devices +RH-Commit: [2/5] 67dce1eecb49555f728f119f8efac00417ff65bf (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2100106 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Peter Xu +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2100106 + +When switching address space with mutex lock hold, mapping will be +replayed for assigned device. This will trigger relock deadlock. + +Also release the mutex resource in unrealize routine. + +Signed-off-by: Zhenzhong Duan +Message-Id: <20220613061010.2674054-3-zhenzhong.duan@intel.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 08f2030a2e46f1e93d186b3a683e5caef1df562b) +Signed-off-by: Eric Auger +--- + hw/virtio/virtio-iommu.c | 20 +++++++++++--------- + include/hw/virtio/virtio-iommu.h | 2 +- + 2 files changed, 12 insertions(+), 10 deletions(-) + +diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c +index 5e99e6c62b..440a1c28a7 100644 +--- a/hw/virtio/virtio-iommu.c ++++ b/hw/virtio/virtio-iommu.c +@@ -79,7 +79,7 @@ static bool virtio_iommu_device_bypassed(IOMMUDevice *sdev) + + sid = virtio_iommu_get_bdf(sdev); + +- qemu_mutex_lock(&s->mutex); ++ qemu_rec_mutex_lock(&s->mutex); + /* need to check bypass before system reset */ + if (!s->endpoints) { + bypassed = s->config.bypass; +@@ -94,7 +94,7 @@ static bool virtio_iommu_device_bypassed(IOMMUDevice *sdev) + } + + unlock: +- qemu_mutex_unlock(&s->mutex); ++ qemu_rec_mutex_unlock(&s->mutex); + return bypassed; + } + +@@ -746,7 +746,7 @@ static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq) + tail.status = VIRTIO_IOMMU_S_DEVERR; + goto out; + } +- qemu_mutex_lock(&s->mutex); ++ qemu_rec_mutex_lock(&s->mutex); + switch (head.type) { + case VIRTIO_IOMMU_T_ATTACH: + tail.status = virtio_iommu_handle_attach(s, iov, iov_cnt); +@@ -775,7 +775,7 @@ static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq) + default: + tail.status = VIRTIO_IOMMU_S_UNSUPP; + } +- qemu_mutex_unlock(&s->mutex); ++ qemu_rec_mutex_unlock(&s->mutex); + + out: + sz = iov_from_buf(elem->in_sg, elem->in_num, 0, +@@ -863,7 +863,7 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr, + sid = virtio_iommu_get_bdf(sdev); + + trace_virtio_iommu_translate(mr->parent_obj.name, sid, addr, flag); +- qemu_mutex_lock(&s->mutex); ++ qemu_rec_mutex_lock(&s->mutex); + + ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid)); + if (!ep) { +@@ -947,7 +947,7 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr, + trace_virtio_iommu_translate_out(addr, entry.translated_addr, sid); + + unlock: +- qemu_mutex_unlock(&s->mutex); ++ qemu_rec_mutex_unlock(&s->mutex); + return entry; + } + +@@ -1036,7 +1036,7 @@ static void virtio_iommu_replay(IOMMUMemoryRegion *mr, IOMMUNotifier *n) + + sid = virtio_iommu_get_bdf(sdev); + +- qemu_mutex_lock(&s->mutex); ++ qemu_rec_mutex_lock(&s->mutex); + + if (!s->endpoints) { + goto unlock; +@@ -1050,7 +1050,7 @@ static void virtio_iommu_replay(IOMMUMemoryRegion *mr, IOMMUNotifier *n) + g_tree_foreach(ep->domain->mappings, virtio_iommu_remap, mr); + + unlock: +- qemu_mutex_unlock(&s->mutex); ++ qemu_rec_mutex_unlock(&s->mutex); + } + + static int virtio_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu_mr, +@@ -1169,7 +1169,7 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) + virtio_add_feature(&s->features, VIRTIO_IOMMU_F_PROBE); + virtio_add_feature(&s->features, VIRTIO_IOMMU_F_BYPASS_CONFIG); + +- qemu_mutex_init(&s->mutex); ++ qemu_rec_mutex_init(&s->mutex); + + s->as_by_busptr = g_hash_table_new_full(NULL, NULL, NULL, g_free); + +@@ -1197,6 +1197,8 @@ static void virtio_iommu_device_unrealize(DeviceState *dev) + g_tree_destroy(s->endpoints); + } + ++ qemu_rec_mutex_destroy(&s->mutex); ++ + virtio_delete_queue(s->req_vq); + virtio_delete_queue(s->event_vq); + virtio_cleanup(vdev); +diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h +index 102eeefa73..2ad5ee320b 100644 +--- a/include/hw/virtio/virtio-iommu.h ++++ b/include/hw/virtio/virtio-iommu.h +@@ -58,7 +58,7 @@ struct VirtIOIOMMU { + ReservedRegion *reserved_regions; + uint32_t nb_reserved_regions; + GTree *domains; +- QemuMutex mutex; ++ QemuRecMutex mutex; + GTree *endpoints; + bool boot_bypass; + }; +-- +2.31.1 + diff --git a/kvm-virtio-net-Expose-MAC_TABLE_ENTRIES.patch b/kvm-virtio-net-Expose-MAC_TABLE_ENTRIES.patch new file mode 100644 index 0000000..4ae4cc4 --- /dev/null +++ b/kvm-virtio-net-Expose-MAC_TABLE_ENTRIES.patch @@ -0,0 +1,69 @@ +From dffe24d5c1f5a4676e9d2a5bc032effd420b008f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 08/32] virtio-net: Expose MAC_TABLE_ENTRIES +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [8/27] 5c3b96215ddf853cafc594da47f57d7e157db4ee (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 6758c01f054c2a842d41d927d628b09f649d3254 +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:27 2022 +0200 + + virtio-net: Expose MAC_TABLE_ENTRIES + + vhost-vdpa control virtqueue needs to know the maximum entries supported + by the virtio-net device, so we know if it is possible to apply the + filter. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/net/virtio-net.c | 1 - + include/hw/virtio/virtio-net.h | 3 +++ + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 633de61513..2a127f0a3b 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -49,7 +49,6 @@ + + #define VIRTIO_NET_VM_VERSION 11 + +-#define MAC_TABLE_ENTRIES 64 + #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */ + + /* previously fixed value */ +diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h +index eb87032627..cce1c554f7 100644 +--- a/include/hw/virtio/virtio-net.h ++++ b/include/hw/virtio/virtio-net.h +@@ -35,6 +35,9 @@ OBJECT_DECLARE_SIMPLE_TYPE(VirtIONet, VIRTIO_NET) + * and latency. */ + #define TX_BURST 256 + ++/* Maximum VIRTIO_NET_CTRL_MAC_TABLE_SET unicast + multicast entries. */ ++#define MAC_TABLE_ENTRIES 64 ++ + typedef struct virtio_net_conf + { + uint32_t txtimer; +-- +2.31.1 + diff --git a/kvm-virtio-net-Expose-ctrl-virtqueue-logic.patch b/kvm-virtio-net-Expose-ctrl-virtqueue-logic.patch new file mode 100644 index 0000000..b4b9012 --- /dev/null +++ b/kvm-virtio-net-Expose-ctrl-virtqueue-logic.patch @@ -0,0 +1,169 @@ +From 49e91b34b62f5da147fa2fb80d203dd675c48f64 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 09/32] virtio-net: Expose ctrl virtqueue logic +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [9/27] c4ab1e35f4ca728df82a687763c662369282c513 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 640b8a1c588b56349b3307d88459ea1cd86181fb +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:28 2022 +0200 + + virtio-net: Expose ctrl virtqueue logic + + This allows external vhost-net devices to modify the state of the + VirtIO device model once the vhost-vdpa device has acknowledged the + control commands. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/net/virtio-net.c | 84 ++++++++++++++++++++-------------- + include/hw/virtio/virtio-net.h | 4 ++ + 2 files changed, 53 insertions(+), 35 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 2a127f0a3b..59bedba681 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -1433,57 +1433,71 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, + return VIRTIO_NET_OK; + } + +-static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) ++size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev, ++ const struct iovec *in_sg, unsigned in_num, ++ const struct iovec *out_sg, ++ unsigned out_num) + { + VirtIONet *n = VIRTIO_NET(vdev); + struct virtio_net_ctrl_hdr ctrl; + virtio_net_ctrl_ack status = VIRTIO_NET_ERR; +- VirtQueueElement *elem; + size_t s; + struct iovec *iov, *iov2; +- unsigned int iov_cnt; ++ ++ if (iov_size(in_sg, in_num) < sizeof(status) || ++ iov_size(out_sg, out_num) < sizeof(ctrl)) { ++ virtio_error(vdev, "virtio-net ctrl missing headers"); ++ return 0; ++ } ++ ++ iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num); ++ s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl)); ++ iov_discard_front(&iov, &out_num, sizeof(ctrl)); ++ if (s != sizeof(ctrl)) { ++ status = VIRTIO_NET_ERR; ++ } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { ++ status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num); ++ } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { ++ status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num); ++ } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { ++ status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num); ++ } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { ++ status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num); ++ } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { ++ status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num); ++ } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { ++ status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num); ++ } ++ ++ s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status)); ++ assert(s == sizeof(status)); ++ ++ g_free(iov2); ++ return sizeof(status); ++} ++ ++static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) ++{ ++ VirtQueueElement *elem; + + for (;;) { ++ size_t written; + elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); + if (!elem) { + break; + } +- if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) || +- iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) { +- virtio_error(vdev, "virtio-net ctrl missing headers"); ++ ++ written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num, ++ elem->out_sg, elem->out_num); ++ if (written > 0) { ++ virtqueue_push(vq, elem, written); ++ virtio_notify(vdev, vq); ++ g_free(elem); ++ } else { + virtqueue_detach_element(vq, elem, 0); + g_free(elem); + break; + } +- +- iov_cnt = elem->out_num; +- iov2 = iov = g_memdup2(elem->out_sg, +- sizeof(struct iovec) * elem->out_num); +- s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl)); +- iov_discard_front(&iov, &iov_cnt, sizeof(ctrl)); +- if (s != sizeof(ctrl)) { +- status = VIRTIO_NET_ERR; +- } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { +- status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt); +- } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { +- status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt); +- } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { +- status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt); +- } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { +- status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt); +- } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { +- status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt); +- } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { +- status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt); +- } +- +- s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status)); +- assert(s == sizeof(status)); +- +- virtqueue_push(vq, elem, sizeof(status)); +- virtio_notify(vdev, vq); +- g_free(iov2); +- g_free(elem); + } + } + +diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h +index cce1c554f7..ef234ffe7e 100644 +--- a/include/hw/virtio/virtio-net.h ++++ b/include/hw/virtio/virtio-net.h +@@ -221,6 +221,10 @@ struct VirtIONet { + struct EBPFRSSContext ebpf_rss; + }; + ++size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev, ++ const struct iovec *in_sg, unsigned in_num, ++ const struct iovec *out_sg, ++ unsigned out_num); + void virtio_net_set_netclient_name(VirtIONet *n, const char *name, + const char *type); + +-- +2.31.1 + diff --git a/kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch b/kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch new file mode 100644 index 0000000..9da7ea7 --- /dev/null +++ b/kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch @@ -0,0 +1,143 @@ +From 316b73277de233c7a9b6917077c00d7012060944 Mon Sep 17 00:00:00 2001 +From: Si-Wei Liu +Date: Fri, 6 May 2022 19:28:13 -0700 +Subject: [PATCH 09/16] virtio-net: align ctrl_vq index for non-mq guest for + vhost_vdpa +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jason Wang +RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA +RH-Commit: [2/7] 7f764bbb579c7b473ad67fc25b46e698d277e781 (jasowang/qemu-kvm-cs) +RH-Bugzilla: 2070804 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu + +With MQ enabled vdpa device and non-MQ supporting guest e.g. +booting vdpa with mq=on over OVMF of single vqp, below assert +failure is seen: + +../hw/virtio/vhost-vdpa.c:560: vhost_vdpa_get_vq_index: Assertion `idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs' failed. + +0 0x00007f8ce3ff3387 in raise () at /lib64/libc.so.6 +1 0x00007f8ce3ff4a78 in abort () at /lib64/libc.so.6 +2 0x00007f8ce3fec1a6 in __assert_fail_base () at /lib64/libc.so.6 +3 0x00007f8ce3fec252 in () at /lib64/libc.so.6 +4 0x0000558f52d79421 in vhost_vdpa_get_vq_index (dev=, idx=) at ../hw/virtio/vhost-vdpa.c:563 +5 0x0000558f52d79421 in vhost_vdpa_get_vq_index (dev=, idx=) at ../hw/virtio/vhost-vdpa.c:558 +6 0x0000558f52d7329a in vhost_virtqueue_mask (hdev=0x558f55c01800, vdev=0x558f568f91f0, n=2, mask=) at ../hw/virtio/vhost.c:1557 +7 0x0000558f52c6b89a in virtio_pci_set_guest_notifier (d=d@entry=0x558f568f0f60, n=n@entry=2, assign=assign@entry=true, with_irqfd=with_irqfd@entry=false) + at ../hw/virtio/virtio-pci.c:974 +8 0x0000558f52c6c0d8 in virtio_pci_set_guest_notifiers (d=0x558f568f0f60, nvqs=3, assign=true) at ../hw/virtio/virtio-pci.c:1019 +9 0x0000558f52bf091d in vhost_net_start (dev=dev@entry=0x558f568f91f0, ncs=0x558f56937cd0, data_queue_pairs=data_queue_pairs@entry=1, cvq=cvq@entry=1) + at ../hw/net/vhost_net.c:361 +10 0x0000558f52d4e5e7 in virtio_net_set_status (status=, n=0x558f568f91f0) at ../hw/net/virtio-net.c:289 +11 0x0000558f52d4e5e7 in virtio_net_set_status (vdev=0x558f568f91f0, status=15 '\017') at ../hw/net/virtio-net.c:370 +12 0x0000558f52d6c4b2 in virtio_set_status (vdev=vdev@entry=0x558f568f91f0, val=val@entry=15 '\017') at ../hw/virtio/virtio.c:1945 +13 0x0000558f52c69eff in virtio_pci_common_write (opaque=0x558f568f0f60, addr=, val=, size=) at ../hw/virtio/virtio-pci.c:1292 +14 0x0000558f52d15d6e in memory_region_write_accessor (mr=0x558f568f19d0, addr=20, value=, size=1, shift=, mask=, attrs=...) + at ../softmmu/memory.c:492 +15 0x0000558f52d127de in access_with_adjusted_size (addr=addr@entry=20, value=value@entry=0x7f8cdbffe748, size=size@entry=1, access_size_min=, access_size_max=, access_fn=0x558f52d15cf0 , mr=0x558f568f19d0, attrs=...) at ../softmmu/memory.c:554 +16 0x0000558f52d157ef in memory_region_dispatch_write (mr=mr@entry=0x558f568f19d0, addr=20, data=, op=, attrs=attrs@entry=...) + at ../softmmu/memory.c:1504 +17 0x0000558f52d078e7 in flatview_write_continue (fv=fv@entry=0x7f8accbc3b90, addr=addr@entry=103079215124, attrs=..., ptr=ptr@entry=0x7f8ce6300028, len=len@entry=1, addr1=, l=, mr=0x558f568f19d0) at /home/opc/qemu-upstream/include/qemu/host-utils.h:165 +18 0x0000558f52d07b06 in flatview_write (fv=0x7f8accbc3b90, addr=103079215124, attrs=..., buf=0x7f8ce6300028, len=1) at ../softmmu/physmem.c:2822 +19 0x0000558f52d0b36b in address_space_write (as=, addr=, attrs=..., buf=buf@entry=0x7f8ce6300028, len=) + at ../softmmu/physmem.c:2914 +20 0x0000558f52d0b3da in address_space_rw (as=, addr=, attrs=..., + attrs@entry=..., buf=buf@entry=0x7f8ce6300028, len=, is_write=) at ../softmmu/physmem.c:2924 +21 0x0000558f52dced09 in kvm_cpu_exec (cpu=cpu@entry=0x558f55c2da60) at ../accel/kvm/kvm-all.c:2903 +22 0x0000558f52dcfabd in kvm_vcpu_thread_fn (arg=arg@entry=0x558f55c2da60) at ../accel/kvm/kvm-accel-ops.c:49 +23 0x0000558f52f9f04a in qemu_thread_start (args=) at ../util/qemu-thread-posix.c:556 +24 0x00007f8ce4392ea5 in start_thread () at /lib64/libpthread.so.0 +25 0x00007f8ce40bb9fd in clone () at /lib64/libc.so.6 + +The cause for the assert failure is due to that the vhost_dev index +for the ctrl vq was not aligned with actual one in use by the guest. +Upon multiqueue feature negotiation in virtio_net_set_multiqueue(), +if guest doesn't support multiqueue, the guest vq layout would shrink +to a single queue pair, consisting of 3 vqs in total (rx, tx and ctrl). +This results in ctrl_vq taking a different vhost_dev group index than +the default. We can map vq to the correct vhost_dev group by checking +if MQ is supported by guest and successfully negotiated. Since the +MQ feature is only present along with CTRL_VQ, we ensure the index +2 is only meant for the control vq while MQ is not supported by guest. + +Fixes: 22288fe ("virtio-net: vhost control virtqueue support") +Suggested-by: Jason Wang +Signed-off-by: Si-Wei Liu +Acked-by: Jason Wang +Message-Id: <1651890498-24478-3-git-send-email-si-wei.liu@oracle.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 68b0a6395f36a8f48f56f46d05f30be2067598b0) +Signed-off-by: Jason Wang +--- + hw/net/virtio-net.c | 33 +++++++++++++++++++++++++++++++-- + 1 file changed, 31 insertions(+), 2 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index ffb3475201..f0bb29c741 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -14,6 +14,7 @@ + #include "qemu/osdep.h" + #include "qemu/atomic.h" + #include "qemu/iov.h" ++#include "qemu/log.h" + #include "qemu/main-loop.h" + #include "qemu/module.h" + #include "hw/virtio/virtio.h" +@@ -3171,8 +3172,22 @@ static NetClientInfo net_virtio_info = { + static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) + { + VirtIONet *n = VIRTIO_NET(vdev); +- NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx)); ++ NetClientState *nc; + assert(n->vhost_started); ++ if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { ++ /* Must guard against invalid features and bogus queue index ++ * from being set by malicious guest, or penetrated through ++ * buggy migration stream. ++ */ ++ if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { ++ qemu_log_mask(LOG_GUEST_ERROR, ++ "%s: bogus vq index ignored\n", __func__); ++ return false; ++ } ++ nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); ++ } else { ++ nc = qemu_get_subqueue(n->nic, vq2q(idx)); ++ } + return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); + } + +@@ -3180,8 +3195,22 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, + bool mask) + { + VirtIONet *n = VIRTIO_NET(vdev); +- NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx)); ++ NetClientState *nc; + assert(n->vhost_started); ++ if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { ++ /* Must guard against invalid features and bogus queue index ++ * from being set by malicious guest, or penetrated through ++ * buggy migration stream. ++ */ ++ if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { ++ qemu_log_mask(LOG_GUEST_ERROR, ++ "%s: bogus vq index ignored\n", __func__); ++ return; ++ } ++ nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); ++ } else { ++ nc = qemu_get_subqueue(n->nic, vq2q(idx)); ++ } + vhost_net_virtqueue_mask(get_vhost_net(nc->peer), + vdev, idx, mask); + } +-- +2.31.1 + diff --git a/kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch b/kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch new file mode 100644 index 0000000..3930cc2 --- /dev/null +++ b/kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch @@ -0,0 +1,109 @@ +From 521a1953bc11ab6823dcbbee773bcf86e926a9e7 Mon Sep 17 00:00:00 2001 +From: Si-Wei Liu +Date: Fri, 6 May 2022 19:28:18 -0700 +Subject: [PATCH 14/16] virtio-net: don't handle mq request in userspace + handler for vhost-vdpa +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jason Wang +RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA +RH-Commit: [7/7] 9781cab45448ae16a00fbf10cf7995df6b984a0a (jasowang/qemu-kvm-cs) +RH-Bugzilla: 2070804 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu + +virtio_queue_host_notifier_read() tends to read pending event +left behind on ioeventfd in the vhost_net_stop() path, and +attempts to handle outstanding kicks from userspace vq handler. +However, in the ctrl_vq handler, virtio_net_handle_mq() has a +recursive call into virtio_net_set_status(), which may lead to +segmentation fault as shown in below stack trace: + +0 0x000055f800df1780 in qdev_get_parent_bus (dev=0x0) at ../hw/core/qdev.c:376 +1 0x000055f800c68ad8 in virtio_bus_device_iommu_enabled (vdev=vdev@entry=0x0) at ../hw/virtio/virtio-bus.c:331 +2 0x000055f800d70d7f in vhost_memory_unmap (dev=) at ../hw/virtio/vhost.c:318 +3 0x000055f800d70d7f in vhost_memory_unmap (dev=, buffer=0x7fc19bec5240, len=2052, is_write=1, access_len=2052) at ../hw/virtio/vhost.c:336 +4 0x000055f800d71867 in vhost_virtqueue_stop (dev=dev@entry=0x55f8037ccc30, vdev=vdev@entry=0x55f8044ec590, vq=0x55f8037cceb0, idx=0) at ../hw/virtio/vhost.c:1241 +5 0x000055f800d7406c in vhost_dev_stop (hdev=hdev@entry=0x55f8037ccc30, vdev=vdev@entry=0x55f8044ec590) at ../hw/virtio/vhost.c:1839 +6 0x000055f800bf00a7 in vhost_net_stop_one (net=0x55f8037ccc30, dev=0x55f8044ec590) at ../hw/net/vhost_net.c:315 +7 0x000055f800bf0678 in vhost_net_stop (dev=dev@entry=0x55f8044ec590, ncs=0x55f80452bae0, data_queue_pairs=data_queue_pairs@entry=7, cvq=cvq@entry=1) + at ../hw/net/vhost_net.c:423 +8 0x000055f800d4e628 in virtio_net_set_status (status=, n=0x55f8044ec590) at ../hw/net/virtio-net.c:296 +9 0x000055f800d4e628 in virtio_net_set_status (vdev=vdev@entry=0x55f8044ec590, status=15 '\017') at ../hw/net/virtio-net.c:370 +10 0x000055f800d534d8 in virtio_net_handle_ctrl (iov_cnt=, iov=, cmd=0 '\000', n=0x55f8044ec590) at ../hw/net/virtio-net.c:1408 +11 0x000055f800d534d8 in virtio_net_handle_ctrl (vdev=0x55f8044ec590, vq=0x7fc1a7e888d0) at ../hw/net/virtio-net.c:1452 +12 0x000055f800d69f37 in virtio_queue_host_notifier_read (vq=0x7fc1a7e888d0) at ../hw/virtio/virtio.c:2331 +13 0x000055f800d69f37 in virtio_queue_host_notifier_read (n=n@entry=0x7fc1a7e8894c) at ../hw/virtio/virtio.c:3575 +14 0x000055f800c688e6 in virtio_bus_cleanup_host_notifier (bus=, n=n@entry=14) at ../hw/virtio/virtio-bus.c:312 +15 0x000055f800d73106 in vhost_dev_disable_notifiers (hdev=hdev@entry=0x55f8035b51b0, vdev=vdev@entry=0x55f8044ec590) + at ../../../include/hw/virtio/virtio-bus.h:35 +16 0x000055f800bf00b2 in vhost_net_stop_one (net=0x55f8035b51b0, dev=0x55f8044ec590) at ../hw/net/vhost_net.c:316 +17 0x000055f800bf0678 in vhost_net_stop (dev=dev@entry=0x55f8044ec590, ncs=0x55f80452bae0, data_queue_pairs=data_queue_pairs@entry=7, cvq=cvq@entry=1) + at ../hw/net/vhost_net.c:423 +18 0x000055f800d4e628 in virtio_net_set_status (status=, n=0x55f8044ec590) at ../hw/net/virtio-net.c:296 +19 0x000055f800d4e628 in virtio_net_set_status (vdev=0x55f8044ec590, status=15 '\017') at ../hw/net/virtio-net.c:370 +20 0x000055f800d6c4b2 in virtio_set_status (vdev=0x55f8044ec590, val=) at ../hw/virtio/virtio.c:1945 +21 0x000055f800d11d9d in vm_state_notify (running=running@entry=false, state=state@entry=RUN_STATE_SHUTDOWN) at ../softmmu/runstate.c:333 +22 0x000055f800d04e7a in do_vm_stop (state=state@entry=RUN_STATE_SHUTDOWN, send_stop=send_stop@entry=false) at ../softmmu/cpus.c:262 +23 0x000055f800d04e99 in vm_shutdown () at ../softmmu/cpus.c:280 +24 0x000055f800d126af in qemu_cleanup () at ../softmmu/runstate.c:812 +25 0x000055f800ad5b13 in main (argc=, argv=, envp=) at ../softmmu/main.c:51 + +For now, temporarily disable handling MQ request from the ctrl_vq +userspace hanlder to avoid the recursive virtio_net_set_status() +call. Some rework is needed to allow changing the number of +queues without going through a full virtio_net_set_status cycle, +particularly for vhost-vdpa backend. + +This patch will need to be reverted as soon as future patches of +having the change of #queues handled in userspace is merged. + +Fixes: 402378407db ("vhost-vdpa: multiqueue support") +Signed-off-by: Si-Wei Liu +Acked-by: Jason Wang +Message-Id: <1651890498-24478-8-git-send-email-si-wei.liu@oracle.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 2a7888cc3aa31faee839fa5dddad354ff8941f4c) +Signed-off-by: Jason Wang +--- + hw/net/virtio-net.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index f0bb29c741..099e65036d 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -1381,6 +1381,7 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, + { + VirtIODevice *vdev = VIRTIO_DEVICE(n); + uint16_t queue_pairs; ++ NetClientState *nc = qemu_get_queue(n->nic); + + virtio_net_disable_rss(n); + if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) { +@@ -1412,6 +1413,18 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, + return VIRTIO_NET_ERR; + } + ++ /* Avoid changing the number of queue_pairs for vdpa device in ++ * userspace handler. A future fix is needed to handle the mq ++ * change in userspace handler with vhost-vdpa. Let's disable ++ * the mq handling from userspace for now and only allow get ++ * done through the kernel. Ripples may be seen when falling ++ * back to userspace, but without doing it qemu process would ++ * crash on a recursive entry to virtio_net_set_status(). ++ */ ++ if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { ++ return VIRTIO_NET_ERR; ++ } ++ + n->curr_queue_pairs = queue_pairs; + /* stop the backend before changing the number of queue_pairs to avoid handling a + * disabled queue */ +-- +2.31.1 + diff --git a/kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch b/kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch new file mode 100644 index 0000000..f6072d2 --- /dev/null +++ b/kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch @@ -0,0 +1,52 @@ +From 9e737aba614e94da4458f02d4ff97e95ffffd19f Mon Sep 17 00:00:00 2001 +From: Si-Wei Liu +Date: Fri, 6 May 2022 19:28:12 -0700 +Subject: [PATCH 08/16] virtio-net: setup vhost_dev and notifiers for cvq only + when feature is negotiated +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jason Wang +RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA +RH-Commit: [1/7] a5c5a2862b2e4d15ef7c09da3e4234fdef37cc66 (jasowang/qemu-kvm-cs) +RH-Bugzilla: 2070804 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu + +When the control virtqueue feature is absent or not negotiated, +vhost_net_start() still tries to set up vhost_dev and install +vhost notifiers for the control virtqueue, which results in +erroneous ioctl calls with incorrect queue index sending down +to driver. Do that only when needed. + +Fixes: 22288fe ("virtio-net: vhost control virtqueue support") +Signed-off-by: Si-Wei Liu +Acked-by: Jason Wang +Message-Id: <1651890498-24478-2-git-send-email-si-wei.liu@oracle.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit aa8581945a13712ff3eed0ad3ba7a9664fc1604b) +Signed-off-by: Jason Wang +--- + hw/net/virtio-net.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 1067e72b39..ffb3475201 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -245,7 +245,8 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) + VirtIODevice *vdev = VIRTIO_DEVICE(n); + NetClientState *nc = qemu_get_queue(n->nic); + int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; +- int cvq = n->max_ncs - n->max_queue_pairs; ++ int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ? ++ n->max_ncs - n->max_queue_pairs : 0; + + if (!get_vhost_net(nc->peer)) { + return; +-- +2.31.1 + diff --git a/kvm-virtio-scsi-clean-up-virtio_scsi_handle_cmd_vq.patch b/kvm-virtio-scsi-clean-up-virtio_scsi_handle_cmd_vq.patch new file mode 100644 index 0000000..897e04c --- /dev/null +++ b/kvm-virtio-scsi-clean-up-virtio_scsi_handle_cmd_vq.patch @@ -0,0 +1,77 @@ +From 975af1b9f1811e113e1babd928ae70f8e4ebefb5 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 17 May 2022 09:28:19 +0100 +Subject: [PATCH 13/16] virtio-scsi: clean up virtio_scsi_handle_cmd_vq() + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 88: virtio-scsi: fix 100% CPU consumption with IOThreads +RH-Commit: [5/6] 27b0225783fa9bbb8fe5ee692bd3f0a888d49d07 (stefanha/centos-stream-qemu-kvm) +RH-Bugzilla: 2079347 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefano Garzarella + +virtio_scsi_handle_cmd_vq() is only called from hw/scsi/virtio-scsi.c +now and its return value is no longer used. Remove the function +prototype from virtio-scsi.h and drop the return value. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Paolo Bonzini +Message-id: 20220427143541.119567-6-stefanha@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit ad482b57ef841b2d4883c5079d20ba44ff5e4b3e) +Signed-off-by: Stefan Hajnoczi +--- + hw/scsi/virtio-scsi.c | 5 +---- + include/hw/virtio/virtio-scsi.h | 1 - + 2 files changed, 1 insertion(+), 5 deletions(-) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index a47033d91d..df5ff8bab7 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -685,12 +685,11 @@ static void virtio_scsi_handle_cmd_req_submit(VirtIOSCSI *s, VirtIOSCSIReq *req) + scsi_req_unref(sreq); + } + +-bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) ++static void virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) + { + VirtIOSCSIReq *req, *next; + int ret = 0; + bool suppress_notifications = virtio_queue_get_notification(vq); +- bool progress = false; + + QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs); + +@@ -700,7 +699,6 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) + } + + while ((req = virtio_scsi_pop_req(s, vq))) { +- progress = true; + ret = virtio_scsi_handle_cmd_req_prepare(s, req); + if (!ret) { + QTAILQ_INSERT_TAIL(&reqs, req, next); +@@ -725,7 +723,6 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) + QTAILQ_FOREACH_SAFE(req, &reqs, next, next) { + virtio_scsi_handle_cmd_req_submit(s, req); + } +- return progress; + } + + static void virtio_scsi_handle_cmd(VirtIODevice *vdev, VirtQueue *vq) +diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h +index 44dc3b81ec..2497530064 100644 +--- a/include/hw/virtio/virtio-scsi.h ++++ b/include/hw/virtio/virtio-scsi.h +@@ -151,7 +151,6 @@ void virtio_scsi_common_realize(DeviceState *dev, + Error **errp); + + void virtio_scsi_common_unrealize(DeviceState *dev); +-bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq); + void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req); + void virtio_scsi_free_req(VirtIOSCSIReq *req); + void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, +-- +2.31.1 + diff --git a/kvm-virtio-scsi-clean-up-virtio_scsi_handle_ctrl_vq.patch b/kvm-virtio-scsi-clean-up-virtio_scsi_handle_ctrl_vq.patch new file mode 100644 index 0000000..30f012f --- /dev/null +++ b/kvm-virtio-scsi-clean-up-virtio_scsi_handle_ctrl_vq.patch @@ -0,0 +1,65 @@ +From c6e16a7a5a18ec2bc4f8a6f5cc1c887e18b16cdf Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 17 May 2022 09:28:12 +0100 +Subject: [PATCH 12/16] virtio-scsi: clean up virtio_scsi_handle_ctrl_vq() + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 88: virtio-scsi: fix 100% CPU consumption with IOThreads +RH-Commit: [4/6] ca3751b7bfad5163c5b1c81b8525936a848d42ea (stefanha/centos-stream-qemu-kvm) +RH-Bugzilla: 2079347 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefano Garzarella + +virtio_scsi_handle_ctrl_vq() is only called from hw/scsi/virtio-scsi.c +now and its return value is no longer used. Remove the function +prototype from virtio-scsi.h and drop the return value. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Paolo Bonzini +Message-id: 20220427143541.119567-5-stefanha@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 73b3b49f1880f236b4d0ffd7efb00280c05a5fab) +Signed-off-by: Stefan Hajnoczi +--- + hw/scsi/virtio-scsi.c | 5 +---- + include/hw/virtio/virtio-scsi.h | 1 - + 2 files changed, 1 insertion(+), 5 deletions(-) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index dd2185b943..a47033d91d 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -460,16 +460,13 @@ static void virtio_scsi_handle_ctrl_req(VirtIOSCSI *s, VirtIOSCSIReq *req) + } + } + +-bool virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq) ++static void virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq) + { + VirtIOSCSIReq *req; +- bool progress = false; + + while ((req = virtio_scsi_pop_req(s, vq))) { +- progress = true; + virtio_scsi_handle_ctrl_req(s, req); + } +- return progress; + } + + /* +diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h +index 5957597825..44dc3b81ec 100644 +--- a/include/hw/virtio/virtio-scsi.h ++++ b/include/hw/virtio/virtio-scsi.h +@@ -152,7 +152,6 @@ void virtio_scsi_common_realize(DeviceState *dev, + + void virtio_scsi_common_unrealize(DeviceState *dev); + bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq); +-bool virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq); + void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req); + void virtio_scsi_free_req(VirtIOSCSIReq *req); + void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, +-- +2.31.1 + diff --git a/kvm-virtio-scsi-clean-up-virtio_scsi_handle_event_vq.patch b/kvm-virtio-scsi-clean-up-virtio_scsi_handle_event_vq.patch new file mode 100644 index 0000000..bfdd39b --- /dev/null +++ b/kvm-virtio-scsi-clean-up-virtio_scsi_handle_event_vq.patch @@ -0,0 +1,62 @@ +From 019d5a0ca5d13f837a59b9e2815e2fd7ac120807 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 17 May 2022 09:28:06 +0100 +Subject: [PATCH 11/16] virtio-scsi: clean up virtio_scsi_handle_event_vq() + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 88: virtio-scsi: fix 100% CPU consumption with IOThreads +RH-Commit: [3/6] f8dbc4c1991c61e4cf8dea50942c3cd509c9c4bd (stefanha/centos-stream-qemu-kvm) +RH-Bugzilla: 2079347 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefano Garzarella + +virtio_scsi_handle_event_vq() is only called from hw/scsi/virtio-scsi.c +now and its return value is no longer used. Remove the function +prototype from virtio-scsi.h and drop the return value. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Paolo Bonzini +Message-id: 20220427143541.119567-4-stefanha@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 37ce2de95169dacab3fb53d11bd4509b9c2e3a4c) +Signed-off-by: Stefan Hajnoczi +--- + hw/scsi/virtio-scsi.c | 4 +--- + include/hw/virtio/virtio-scsi.h | 1 - + 2 files changed, 1 insertion(+), 4 deletions(-) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index 7b69eeed64..dd2185b943 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -856,13 +856,11 @@ void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, + virtio_scsi_complete_req(req); + } + +-bool virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq) ++static void virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq) + { + if (s->events_dropped) { + virtio_scsi_push_event(s, NULL, VIRTIO_SCSI_T_NO_EVENT, 0); +- return true; + } +- return false; + } + + static void virtio_scsi_handle_event(VirtIODevice *vdev, VirtQueue *vq) +diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h +index 543681bc18..5957597825 100644 +--- a/include/hw/virtio/virtio-scsi.h ++++ b/include/hw/virtio/virtio-scsi.h +@@ -151,7 +151,6 @@ void virtio_scsi_common_realize(DeviceState *dev, + Error **errp); + + void virtio_scsi_common_unrealize(DeviceState *dev); +-bool virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq); + bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq); + bool virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq); + void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req); +-- +2.31.1 + diff --git a/kvm-virtio-scsi-don-t-waste-CPU-polling-the-event-virtqu.patch b/kvm-virtio-scsi-don-t-waste-CPU-polling-the-event-virtqu.patch new file mode 100644 index 0000000..5ba11a2 --- /dev/null +++ b/kvm-virtio-scsi-don-t-waste-CPU-polling-the-event-virtqu.patch @@ -0,0 +1,103 @@ +From 1b609b2af303fb6498b2ef94ac4f2e900dc8c1b2 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 17 May 2022 09:27:45 +0100 +Subject: [PATCH 10/16] virtio-scsi: don't waste CPU polling the event + virtqueue + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 88: virtio-scsi: fix 100% CPU consumption with IOThreads +RH-Commit: [2/6] 7e613d9b9fa8ceb668c78cb3ce7ebe1d73a004b5 (stefanha/centos-stream-qemu-kvm) +RH-Bugzilla: 2079347 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefano Garzarella + +The virtio-scsi event virtqueue is not emptied by its handler function. +This is typical for rx virtqueues where the device uses buffers when +some event occurs (e.g. a packet is received, an error condition +happens, etc). + +Polling non-empty virtqueues wastes CPU cycles. We are not waiting for +new buffers to become available, we are waiting for an event to occur, +so it's a misuse of CPU resources to poll for buffers. + +Introduce the new virtio_queue_aio_attach_host_notifier_no_poll() API, +which is identical to virtio_queue_aio_attach_host_notifier() except +that it does not poll the virtqueue. + +Before this patch the following command-line consumed 100% CPU in the +IOThread polling and calling virtio_scsi_handle_event(): + + $ qemu-system-x86_64 -M accel=kvm -m 1G -cpu host \ + --object iothread,id=iothread0 \ + --device virtio-scsi-pci,iothread=iothread0 \ + --blockdev file,filename=test.img,aio=native,cache.direct=on,node-name=drive0 \ + --device scsi-hd,drive=drive0 + +After this patch CPU is no longer wasted. + +Reported-by: Nir Soffer +Signed-off-by: Stefan Hajnoczi +Tested-by: Nir Soffer +Message-id: 20220427143541.119567-3-stefanha@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 38738f7dbbda90fbc161757b7f4be35b52205552) +Signed-off-by: Stefan Hajnoczi +--- + hw/scsi/virtio-scsi-dataplane.c | 2 +- + hw/virtio/virtio.c | 13 +++++++++++++ + include/hw/virtio/virtio.h | 1 + + 3 files changed, 15 insertions(+), 1 deletion(-) + +diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c +index 29575cbaf6..8bb6e6acfc 100644 +--- a/hw/scsi/virtio-scsi-dataplane.c ++++ b/hw/scsi/virtio-scsi-dataplane.c +@@ -138,7 +138,7 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev) + + aio_context_acquire(s->ctx); + virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx); +- virtio_queue_aio_attach_host_notifier(vs->event_vq, s->ctx); ++ virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx); + + for (i = 0; i < vs->conf.num_queues; i++) { + virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx); +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index 9d637e043e..67a873f54a 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -3534,6 +3534,19 @@ void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx) + virtio_queue_host_notifier_aio_poll_end); + } + ++/* ++ * Same as virtio_queue_aio_attach_host_notifier() but without polling. Use ++ * this for rx virtqueues and similar cases where the virtqueue handler ++ * function does not pop all elements. When the virtqueue is left non-empty ++ * polling consumes CPU cycles and should not be used. ++ */ ++void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx) ++{ ++ aio_set_event_notifier(ctx, &vq->host_notifier, true, ++ virtio_queue_host_notifier_read, ++ NULL, NULL); ++} ++ + void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx) + { + aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL, NULL); +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index b31c4507f5..b62a35fdca 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -317,6 +317,7 @@ EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq); + void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled); + void virtio_queue_host_notifier_read(EventNotifier *n); + void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx); ++void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx); + void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx); + VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector); + VirtQueue *virtio_vector_next_queue(VirtQueue *vq); +-- +2.31.1 + diff --git a/kvm-virtio-scsi-fix-ctrl-and-event-handler-functions-in-.patch b/kvm-virtio-scsi-fix-ctrl-and-event-handler-functions-in-.patch new file mode 100644 index 0000000..1f22ba0 --- /dev/null +++ b/kvm-virtio-scsi-fix-ctrl-and-event-handler-functions-in-.patch @@ -0,0 +1,119 @@ +From 5aaf33dbbbc89d58a52337985641723b9ee13541 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Wed, 27 Apr 2022 15:35:36 +0100 +Subject: [PATCH 09/16] virtio-scsi: fix ctrl and event handler functions in + dataplane mode + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 88: virtio-scsi: fix 100% CPU consumption with IOThreads +RH-Commit: [1/6] 3087889041b960f14a6b3893243f78523a78f637 (stefanha/centos-stream-qemu-kvm) +RH-Bugzilla: 2079347 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefano Garzarella + +Commit f34e8d8b8d48d73f36a67b6d5e492ef9784b5012 ("virtio-scsi: prepare +virtio_scsi_handle_cmd for dataplane") prepared the virtio-scsi cmd +virtqueue handler function to be used in both the dataplane and +non-datpalane code paths. + +It failed to convert the ctrl and event virtqueue handler functions, +which are not designed to be called from the dataplane code path but +will be since the ioeventfd is set up for those virtqueues when +dataplane starts. + +Convert the ctrl and event virtqueue handler functions now so they +operate correctly when called from the dataplane code path. Avoid code +duplication by extracting this code into a helper function. + +Fixes: f34e8d8b8d48d73f36a67b6d5e492ef9784b5012 ("virtio-scsi: prepare virtio_scsi_handle_cmd for dataplane") +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Paolo Bonzini +Message-id: 20220427143541.119567-2-stefanha@redhat.com +[Fixed s/by used/be used/ typo pointed out by Michael Tokarev +. +--Stefan] +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 2f743ef6366c2df4ef51ef3ae318138cdc0125ab) +Signed-off-by: Stefan Hajnoczi +--- + hw/scsi/virtio-scsi.c | 42 +++++++++++++++++++++++++++--------------- + 1 file changed, 27 insertions(+), 15 deletions(-) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index 7f6da33a8a..7b69eeed64 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -472,16 +472,32 @@ bool virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq) + return progress; + } + ++/* ++ * If dataplane is configured but not yet started, do so now and return true on ++ * success. ++ * ++ * Dataplane is started by the core virtio code but virtqueue handler functions ++ * can also be invoked when a guest kicks before DRIVER_OK, so this helper ++ * function helps us deal with manually starting ioeventfd in that case. ++ */ ++static bool virtio_scsi_defer_to_dataplane(VirtIOSCSI *s) ++{ ++ if (!s->ctx || s->dataplane_started) { ++ return false; ++ } ++ ++ virtio_device_start_ioeventfd(&s->parent_obj.parent_obj); ++ return !s->dataplane_fenced; ++} ++ + static void virtio_scsi_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) + { + VirtIOSCSI *s = (VirtIOSCSI *)vdev; + +- if (s->ctx) { +- virtio_device_start_ioeventfd(vdev); +- if (!s->dataplane_fenced) { +- return; +- } ++ if (virtio_scsi_defer_to_dataplane(s)) { ++ return; + } ++ + virtio_scsi_acquire(s); + virtio_scsi_handle_ctrl_vq(s, vq); + virtio_scsi_release(s); +@@ -720,12 +736,10 @@ static void virtio_scsi_handle_cmd(VirtIODevice *vdev, VirtQueue *vq) + /* use non-QOM casts in the data path */ + VirtIOSCSI *s = (VirtIOSCSI *)vdev; + +- if (s->ctx && !s->dataplane_started) { +- virtio_device_start_ioeventfd(vdev); +- if (!s->dataplane_fenced) { +- return; +- } ++ if (virtio_scsi_defer_to_dataplane(s)) { ++ return; + } ++ + virtio_scsi_acquire(s); + virtio_scsi_handle_cmd_vq(s, vq); + virtio_scsi_release(s); +@@ -855,12 +869,10 @@ static void virtio_scsi_handle_event(VirtIODevice *vdev, VirtQueue *vq) + { + VirtIOSCSI *s = VIRTIO_SCSI(vdev); + +- if (s->ctx) { +- virtio_device_start_ioeventfd(vdev); +- if (!s->dataplane_fenced) { +- return; +- } ++ if (virtio_scsi_defer_to_dataplane(s)) { ++ return; + } ++ + virtio_scsi_acquire(s); + virtio_scsi_handle_event_vq(s, vq); + virtio_scsi_release(s); +-- +2.31.1 + diff --git a/kvm-virtio-scsi-move-request-related-items-from-.h-to-.c.patch b/kvm-virtio-scsi-move-request-related-items-from-.h-to-.c.patch new file mode 100644 index 0000000..8487f5c --- /dev/null +++ b/kvm-virtio-scsi-move-request-related-items-from-.h-to-.c.patch @@ -0,0 +1,168 @@ +From 6603f216dbc07a1d221b1665409cfec6cc9960e2 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 17 May 2022 09:28:26 +0100 +Subject: [PATCH 14/16] virtio-scsi: move request-related items from .h to .c + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 88: virtio-scsi: fix 100% CPU consumption with IOThreads +RH-Commit: [6/6] ecdf5289abd04062c85c5ed8e577a5249684a3b0 (stefanha/centos-stream-qemu-kvm) +RH-Bugzilla: 2079347 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefano Garzarella + +There is no longer a need to expose the request and related APIs in +virtio-scsi.h since there are no callers outside virtio-scsi.c. + +Note the block comment in VirtIOSCSIReq has been adjusted to meet the +coding style. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Paolo Bonzini +Message-id: 20220427143541.119567-7-stefanha@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 3dc584abeef0e1277c2de8c1c1974cb49444eb0a) +Signed-off-by: Stefan Hajnoczi +--- + hw/scsi/virtio-scsi.c | 45 ++++++++++++++++++++++++++++++--- + include/hw/virtio/virtio-scsi.h | 40 ----------------------------- + 2 files changed, 41 insertions(+), 44 deletions(-) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index df5ff8bab7..2450c9438c 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -29,6 +29,43 @@ + #include "hw/virtio/virtio-access.h" + #include "trace.h" + ++typedef struct VirtIOSCSIReq { ++ /* ++ * Note: ++ * - fields up to resp_iov are initialized by virtio_scsi_init_req; ++ * - fields starting at vring are zeroed by virtio_scsi_init_req. ++ */ ++ VirtQueueElement elem; ++ ++ VirtIOSCSI *dev; ++ VirtQueue *vq; ++ QEMUSGList qsgl; ++ QEMUIOVector resp_iov; ++ ++ union { ++ /* Used for two-stage request submission */ ++ QTAILQ_ENTRY(VirtIOSCSIReq) next; ++ ++ /* Used for cancellation of request during TMFs */ ++ int remaining; ++ }; ++ ++ SCSIRequest *sreq; ++ size_t resp_size; ++ enum SCSIXferMode mode; ++ union { ++ VirtIOSCSICmdResp cmd; ++ VirtIOSCSICtrlTMFResp tmf; ++ VirtIOSCSICtrlANResp an; ++ VirtIOSCSIEvent event; ++ } resp; ++ union { ++ VirtIOSCSICmdReq cmd; ++ VirtIOSCSICtrlTMFReq tmf; ++ VirtIOSCSICtrlANReq an; ++ } req; ++} VirtIOSCSIReq; ++ + static inline int virtio_scsi_get_lun(uint8_t *lun) + { + return ((lun[2] << 8) | lun[3]) & 0x3FFF; +@@ -45,7 +82,7 @@ static inline SCSIDevice *virtio_scsi_device_get(VirtIOSCSI *s, uint8_t *lun) + return scsi_device_get(&s->bus, 0, lun[1], virtio_scsi_get_lun(lun)); + } + +-void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req) ++static void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req) + { + VirtIODevice *vdev = VIRTIO_DEVICE(s); + const size_t zero_skip = +@@ -58,7 +95,7 @@ void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req) + memset((uint8_t *)req + zero_skip, 0, sizeof(*req) - zero_skip); + } + +-void virtio_scsi_free_req(VirtIOSCSIReq *req) ++static void virtio_scsi_free_req(VirtIOSCSIReq *req) + { + qemu_iovec_destroy(&req->resp_iov); + qemu_sglist_destroy(&req->qsgl); +@@ -801,8 +838,8 @@ static void virtio_scsi_reset(VirtIODevice *vdev) + s->events_dropped = false; + } + +-void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, +- uint32_t event, uint32_t reason) ++static void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, ++ uint32_t event, uint32_t reason) + { + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); + VirtIOSCSIReq *req; +diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h +index 2497530064..abdda2cbd0 100644 +--- a/include/hw/virtio/virtio-scsi.h ++++ b/include/hw/virtio/virtio-scsi.h +@@ -94,42 +94,6 @@ struct VirtIOSCSI { + uint32_t host_features; + }; + +-typedef struct VirtIOSCSIReq { +- /* Note: +- * - fields up to resp_iov are initialized by virtio_scsi_init_req; +- * - fields starting at vring are zeroed by virtio_scsi_init_req. +- * */ +- VirtQueueElement elem; +- +- VirtIOSCSI *dev; +- VirtQueue *vq; +- QEMUSGList qsgl; +- QEMUIOVector resp_iov; +- +- union { +- /* Used for two-stage request submission */ +- QTAILQ_ENTRY(VirtIOSCSIReq) next; +- +- /* Used for cancellation of request during TMFs */ +- int remaining; +- }; +- +- SCSIRequest *sreq; +- size_t resp_size; +- enum SCSIXferMode mode; +- union { +- VirtIOSCSICmdResp cmd; +- VirtIOSCSICtrlTMFResp tmf; +- VirtIOSCSICtrlANResp an; +- VirtIOSCSIEvent event; +- } resp; +- union { +- VirtIOSCSICmdReq cmd; +- VirtIOSCSICtrlTMFReq tmf; +- VirtIOSCSICtrlANReq an; +- } req; +-} VirtIOSCSIReq; +- + static inline void virtio_scsi_acquire(VirtIOSCSI *s) + { + if (s->ctx) { +@@ -151,10 +115,6 @@ void virtio_scsi_common_realize(DeviceState *dev, + Error **errp); + + void virtio_scsi_common_unrealize(DeviceState *dev); +-void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req); +-void virtio_scsi_free_req(VirtIOSCSIReq *req); +-void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, +- uint32_t event, uint32_t reason); + + void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp); + int virtio_scsi_dataplane_start(VirtIODevice *s); +-- +2.31.1 + diff --git a/kvm-virtiofsd-Drop-membership-of-all-supplementary-group.patch b/kvm-virtiofsd-Drop-membership-of-all-supplementary-group.patch new file mode 100644 index 0000000..face8e6 --- /dev/null +++ b/kvm-virtiofsd-Drop-membership-of-all-supplementary-group.patch @@ -0,0 +1,110 @@ +From 2754dc2c7def01d7dd1bb39f3e86ef444652d397 Mon Sep 17 00:00:00 2001 +From: Vivek Goyal +Date: Tue, 25 Jan 2022 13:51:14 -0500 +Subject: [PATCH 1/6] virtiofsd: Drop membership of all supplementary groups + (CVE-2022-0358) + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 102: virtiofsd: Drop membership of all supplementary groups (CVE-2022-0358) +RH-Commit: [1/1] 93e56c88277fec8e42559a899d32b80fac4a923f +RH-Bugzilla: 2046198 +RH-Acked-by: Greg Kurz +RH-Acked-by: Sergio Lopez +RH-Acked-by: Laszlo Ersek + +At the start, drop membership of all supplementary groups. This is +not required. + +If we have membership of "root" supplementary group and when we switch +uid/gid using setresuid/setsgid, we still retain membership of existing +supplemntary groups. And that can allow some operations which are not +normally allowed. + +For example, if root in guest creates a dir as follows. + +$ mkdir -m 03777 test_dir + +This sets SGID on dir as well as allows unprivileged users to write into +this dir. + +And now as unprivileged user open file as follows. + +$ su test +$ fd = open("test_dir/priviledge_id", O_RDWR|O_CREAT|O_EXCL, 02755); + +This will create SGID set executable in test_dir/. + +And that's a problem because now an unpriviliged user can execute it, +get egid=0 and get access to resources owned by "root" group. This is +privilege escalation. + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2044863 +Fixes: CVE-2022-0358 +Reported-by: JIETAO XIAO +Suggested-by: Miklos Szeredi +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Vivek Goyal +Message-Id: +Signed-off-by: Dr. David Alan Gilbert + dgilbert: Fixed missing {}'s style nit +(cherry picked from commit 449e8171f96a6a944d1f3b7d3627ae059eae21ca) +--- + tools/virtiofsd/passthrough_ll.c | 27 +++++++++++++++++++++++++++ + 1 file changed, 27 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 64b5b4fbb1..b3d0674f6d 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -54,6 +54,7 @@ + #include + #include + #include ++#include + + #include "qemu/cutils.h" + #include "passthrough_helpers.h" +@@ -1161,6 +1162,30 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) + #define OURSYS_setresuid SYS_setresuid + #endif + ++static void drop_supplementary_groups(void) ++{ ++ int ret; ++ ++ ret = getgroups(0, NULL); ++ if (ret == -1) { ++ fuse_log(FUSE_LOG_ERR, "getgroups() failed with error=%d:%s\n", ++ errno, strerror(errno)); ++ exit(1); ++ } ++ ++ if (!ret) { ++ return; ++ } ++ ++ /* Drop all supplementary groups. We should not need it */ ++ ret = setgroups(0, NULL); ++ if (ret == -1) { ++ fuse_log(FUSE_LOG_ERR, "setgroups() failed with error=%d:%s\n", ++ errno, strerror(errno)); ++ exit(1); ++ } ++} ++ + /* + * Change to uid/gid of caller so that file is created with + * ownership of caller. +@@ -3926,6 +3951,8 @@ int main(int argc, char *argv[]) + + qemu_init_exec_dir(argv[0]); + ++ drop_supplementary_groups(); ++ + pthread_mutex_init(&lo.mutex, NULL); + lo.inodes = g_hash_table_new(lo_key_hash, lo_key_equal); + lo.root.fd = -1; +-- +2.27.0 + diff --git a/kvm-vmxcap-Add-5-level-EPT-bit.patch b/kvm-vmxcap-Add-5-level-EPT-bit.patch new file mode 100644 index 0000000..8cdb980 --- /dev/null +++ b/kvm-vmxcap-Add-5-level-EPT-bit.patch @@ -0,0 +1,48 @@ +From f0f87dcea3fe14b20b8599cda9b1151ca2490d0c Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 30 Mar 2022 14:52:34 -0400 +Subject: [PATCH 07/18] vmxcap: Add 5-level EPT bit + +RH-Author: Jon Maloy +RH-MergeRequest: 139: vmxcap: Add 5-level EPT bit +RH-Commit: [1/2] 4c098f551f1ed8e2a5582f466afda35b28d97055 (jmaloy/qemu-kvm) +RH-Bugzilla: 2065207 +RH-Acked-by: Paolo Bonzini + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2065207 +UPSTREAM: Merged + +commit d312378e59658473aa91aa15c67ec6200d92e5ff +Author: Vitaly Kuznetsov +Date: Mon Feb 21 15:53:16 2022 +0100 + + vmxcap: Add 5-level EPT bit + + 5-level EPT is present in Icelake Server CPUs and is supported by QEMU + ('vmx-page-walk-5'). + + Signed-off-by: Vitaly Kuznetsov + Message-Id: <20220221145316.576138-2-vkuznets@redhat.com> + Signed-off-by: Paolo Bonzini + +(cherry picked from commit d312378e59658473aa91aa15c67ec6200d92e5ff) +Signed-off-by: Jon Maloy +--- + scripts/kvm/vmxcap | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/scripts/kvm/vmxcap b/scripts/kvm/vmxcap +index 6fe66d5f57..f140040104 100755 +--- a/scripts/kvm/vmxcap ++++ b/scripts/kvm/vmxcap +@@ -249,6 +249,7 @@ controls = [ + bits = { + 0: 'Execute-only EPT translations', + 6: 'Page-walk length 4', ++ 7: 'Page-walk length 5', + 8: 'Paging-structure memory type UC', + 14: 'Paging-structure memory type WB', + 16: '2MB EPT pages', +-- +2.27.0 + diff --git a/kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch b/kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch new file mode 100644 index 0000000..56ecea7 --- /dev/null +++ b/kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch @@ -0,0 +1,68 @@ +From 31530bf621dc28689142ffa83d025ec4a4f110c1 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 11 Jan 2022 18:29:31 +0000 +Subject: [PATCH 2/2] x86: Add q35 RHEL 8.6.0 machine type +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 99: x86: Add q35 RHEL 8.6.0 machine type +RH-Commit: [1/1] a694724b6fa972e312bb76b5569bc979d6c596ef +RH-Bugzilla: 2031035 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Cornelia Huck + +Add the new 8.6.0 machine type; note that while the -AV +notation has gone in the product naming, just keep the smbios +definitions the same for consistency. + +Signed-off-by: Dr. David Alan Gilbert +--- + hw/i386/pc_q35.c | 21 ++++++++++++++++++++- + 1 file changed, 20 insertions(+), 1 deletion(-) + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index f6e77bca0e..5559261d9e 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -646,6 +646,24 @@ static void pc_q35_machine_rhel_options(MachineClass *m) + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); + } + ++static void pc_q35_init_rhel860(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel860_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel_options(m); ++ m->desc = "RHEL-8.6.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.6.0"; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel860, "pc-q35-rhel8.6.0", pc_q35_init_rhel860, ++ pc_q35_machine_rhel860_options); ++ ++ + static void pc_q35_init_rhel850(MachineState *machine) + { + pc_q35_init(machine); +@@ -654,8 +672,9 @@ static void pc_q35_init_rhel850(MachineState *machine) + static void pc_q35_machine_rhel850_options(MachineClass *m) + { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); +- pc_q35_machine_rhel_options(m); ++ pc_q35_machine_rhel860_options(m); + m->desc = "RHEL-8.5.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; + pcmc->smbios_stream_product = "RHEL-AV"; + pcmc->smbios_stream_version = "8.5.0"; + compat_props_add(m->compat_props, hw_compat_rhel_8_5, +-- +2.27.0 + diff --git a/kvm-x86.conf b/kvm-x86.conf new file mode 100644 index 0000000..3f7842a --- /dev/null +++ b/kvm-x86.conf @@ -0,0 +1,12 @@ +# Setting modprobe kvm_intel/kvm_amd nested = 1 +# only enables Nested Virtualization until the next reboot or +# module reload. Uncomment the option applicable +# to your system below to enable the feature permanently. +# +# User changes in this file are preserved across upgrades. +# +# For Intel +#options kvm_intel nested=1 +# +# For AMD +#options kvm_amd nested=1 diff --git a/kvm.conf b/kvm.conf new file mode 100644 index 0000000..24e60e9 --- /dev/null +++ b/kvm.conf @@ -0,0 +1,3 @@ +# +# User changes in this file are preserved across upgrades. +# diff --git a/modules-load.conf b/modules-load.conf new file mode 100644 index 0000000..45b477d --- /dev/null +++ b/modules-load.conf @@ -0,0 +1,4 @@ +# When using SELinux in libvirt, automatic loading of the kvm.ko kernel +# module might not work when qemu-kvm tries to access /dev/kvm - thus we +# simply always load this module during the boot process already. +kvm diff --git a/qemu-ga.sysconfig b/qemu-ga.sysconfig new file mode 100644 index 0000000..67bad0c --- /dev/null +++ b/qemu-ga.sysconfig @@ -0,0 +1,19 @@ +# This is a systemd environment file, not a shell script. +# It provides settings for "/lib/systemd/system/qemu-guest-agent.service". + +# Comma-separated blacklist of RPCs to disable, or empty list to enable all. +# +# You can get the list of RPC commands using "qemu-ga --blacklist='?'". +# There should be no spaces between commas and commands in the blacklist. +BLACKLIST_RPC=guest-file-open,guest-file-close,guest-file-read,guest-file-write,guest-file-seek,guest-file-flush,guest-exec,guest-exec-status + +# Fsfreeze hook script specification. +# +# FSFREEZE_HOOK_PATHNAME=/dev/null : disables the feature. +# +# FSFREEZE_HOOK_PATHNAME=/path/to/executable : enables the feature with the +# specified binary or shell script. +# +# FSFREEZE_HOOK_PATHNAME= : enables the feature with the +# default value (invoke "qemu-ga --help" to interrogate). +FSFREEZE_HOOK_PATHNAME=/etc/qemu-ga/fsfreeze-hook diff --git a/qemu-guest-agent.service b/qemu-guest-agent.service new file mode 100644 index 0000000..b33e951 --- /dev/null +++ b/qemu-guest-agent.service @@ -0,0 +1,20 @@ +[Unit] +Description=QEMU Guest Agent +BindsTo=dev-virtio\x2dports-org.qemu.guest_agent.0.device +After=dev-virtio\x2dports-org.qemu.guest_agent.0.device +IgnoreOnIsolate=True + +[Service] +UMask=0077 +EnvironmentFile=/etc/sysconfig/qemu-ga +ExecStart=/usr/bin/qemu-ga \ + --method=virtio-serial \ + --path=/dev/virtio-ports/org.qemu.guest_agent.0 \ + --blacklist=${BLACKLIST_RPC} \ + -F${FSFREEZE_HOOK_PATHNAME} +StandardError=syslog +Restart=always +RestartSec=0 + +[Install] +WantedBy=dev-virtio\x2dports-org.qemu.guest_agent.0.device diff --git a/qemu-kvm.spec b/qemu-kvm.spec new file mode 100644 index 0000000..66f14a2 --- /dev/null +++ b/qemu-kvm.spec @@ -0,0 +1,4174 @@ +%global SLOF_gittagdate 20191022 + +%global SLOF_gittagcommit 899d9883 + +%global have_usbredir 1 +%global have_spice 1 +%global have_opengl 1 +%global have_fdt 1 +%global have_gluster 1 +%global have_kvm_setup 0 +%global have_memlock_limits 0 + + + +# Release candidate version tracking +# global rcver rc4 +%if 0%{?rcver:1} +%global rcrel .%{rcver} +%global rcstr -%{rcver} +%endif + +%ifnarch %{ix86} x86_64 + %global have_usbredir 0 +%endif + +%ifnarch s390x + %global have_librdma 1 +%else + %global have_librdma 0 +%endif + +%ifarch %{ix86} + %global kvm_target i386 +%endif +%ifarch x86_64 + %global kvm_target x86_64 +%else + %global have_spice 0 + %global have_opengl 0 + %global have_gluster 0 +%endif +%ifarch %{power64} + %global kvm_target ppc64 + %global have_kvm_setup 1 + %global have_memlock_limits 1 +%endif +%ifarch s390x + %global kvm_target s390x + %global have_kvm_setup 1 +%endif +%ifarch ppc + %global kvm_target ppc +%endif +%ifarch aarch64 + %global kvm_target aarch64 +%endif + +#Versions of various parts: + +%global requires_all_modules \ +%if %{have_spice} \ +Requires: %{name}-ui-spice = %{epoch}:%{version}-%{release} \ +%endif \ +%if %{have_opengl} \ +Requires: %{name}-ui-opengl = %{epoch}:%{version}-%{release} \ +%endif \ +Requires: %{name}-block-curl = %{epoch}:%{version}-%{release} \ +%if %{have_gluster} \ +Requires: %{name}-block-gluster = %{epoch}:%{version}-%{release} \ +%endif \ +%if %{have_usbredir} \ +Requires: %{name}-hw-usbredir = %{epoch}:%{version}-%{release} \ +%endif \ +Requires: %{name}-block-iscsi = %{epoch}:%{version}-%{release} \ +Requires: %{name}-block-rbd = %{epoch}:%{version}-%{release} \ +Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} + +# Macro to properly setup RHEL/RHEV conflict handling +%define rhev_ma_conflicts() \ +Obsoletes: %1-ma <= %{epoch}:%{version}-%{release} \ +Obsoletes: %1-rhev <= %{epoch}:%{version}-%{release} + +Summary: QEMU is a machine emulator and virtualizer +Name: qemu-kvm +Version: 6.2.0 +Release: 12%{?rcrel}%{?dist} +# Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped +Epoch: 15 +License: GPLv2 and GPLv2+ and CC-BY +Group: Development/Tools +URL: http://www.qemu.org/ +ExclusiveArch: x86_64 %{power64} aarch64 s390x + + +Source0: http://wiki.qemu.org/download/qemu-6.2.0.tar.xz + +# KSM control scripts +Source4: ksm.service +Source5: ksm.sysconfig +Source6: ksmctl.c +Source7: ksmtuned.service +Source8: ksmtuned +Source9: ksmtuned.conf +Source10: qemu-guest-agent.service +Source11: 99-qemu-guest-agent.rules +Source12: bridge.conf +Source13: qemu-ga.sysconfig +Source21: kvm-setup +Source22: kvm-setup.service +Source23: 85-kvm.preset +Source26: vhost.conf +Source27: kvm.conf +Source28: 95-kvm-memlock.conf +Source30: kvm-s390x.conf +Source31: kvm-x86.conf +Source32: qemu-pr-helper.service +Source33: qemu-pr-helper.socket +Source34: 81-kvm-rhel.rules +Source35: udev-kvm-check.c +Source36: README.tests +Source37: tests_data_acpi_pc_SSDT.dimmpxm +Source38: tests_data_acpi_q35_FACP.slic +Source39: tests_data_acpi_q35_SSDT.dimmpxm +Source40: tests_data_acpi_virt_SSDT.memhp + +Patch0001: 0001-redhat-Adding-slirp-to-the-exploded-tree.patch +Patch0005: 0005-Initial-redhat-build.patch +Patch0006: 0006-Enable-disable-devices-for-RHEL.patch +Patch0007: 0007-Machine-type-related-general-changes.patch +Patch0008: 0008-Add-aarch64-machine-types.patch +Patch0009: 0009-Add-ppc64-machine-types.patch +Patch0010: 0010-Add-s390x-machine-types.patch +Patch0011: 0011-Add-x86_64-machine-types.patch +Patch0012: 0012-Enable-make-check.patch +Patch0013: 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch +Patch0014: 0014-Add-support-statement-to-help-output.patch +Patch0015: 0015-globally-limit-the-maximum-number-of-CPUs.patch +Patch0016: 0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +Patch0017: 0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +Patch0018: 0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +Patch0019: 0019-compat-Update-hw_compat_rhel_8_5.patch +Patch0020: 0020-redhat-Update-pseries-rhel8.5.0-machine-type.patch +Patch0021: 0021-redhat-virt-rhel8.5.0-Update-machine-type-compatibil.patch +Patch0022: 0022-Fix-virtio-net-pci-vectors-compat.patch +Patch0023: 0023-x86-rhel-machine-types-Add-pc_rhel_8_5_compat.patch +Patch0024: 0024-x86-rhel-machine-types-Wire-compat-into-q35-and-i440.patch +Patch0025: 0025-redhat-Add-s390x-machine-type-compatibility-handling.patch +# For bz#2005325 - Fix CPU Model for new IBM Z Hardware - qemu part +Patch26: kvm-redhat-Add-rhel8.6.0-machine-type-for-s390x.patch +# For bz#2031041 - Add rhel-8.6.0 machine types for RHEL 8.6 [ppc64le] +Patch27: kvm-redhat-Define-pseries-rhel8.6.0-machine-type.patch +# For bz#2031039 - Add rhel-8.6.0 machine types for RHEL 8.6 [aarch64] +Patch28: kvm-hw-arm-virt-Register-iommu-as-a-class-property.patch +# For bz#2031039 - Add rhel-8.6.0 machine types for RHEL 8.6 [aarch64] +Patch29: kvm-hw-arm-virt-Register-its-as-a-class-property.patch +# For bz#2031039 - Add rhel-8.6.0 machine types for RHEL 8.6 [aarch64] +Patch30: kvm-hw-arm-virt-Rename-default_bus_bypass_iommu.patch +# For bz#2031039 - Add rhel-8.6.0 machine types for RHEL 8.6 [aarch64] +Patch31: kvm-hw-arm-virt-Add-8.6-machine-type.patch +# For bz#2031039 - Add rhel-8.6.0 machine types for RHEL 8.6 [aarch64] +Patch32: kvm-hw-arm-virt-Check-no_tcg_its-and-minor-style-changes.patch +# For bz#2029582 - [8.6] machine types: 6.2: Fix prefer_sockets +Patch33: kvm-rhel-machine-types-x86-set-prefer_sockets.patch +# For bz#2036580 - CVE-2021-4158 virt:rhel/qemu-kvm: QEMU: NULL pointer dereference in pci_write() in hw/acpi/pcihp.c [rhel-8] +Patch34: kvm-acpi-validate-hotplug-selector-on-access.patch +# For bz#2031035 - Add rhel-8.6.0 machine types for RHEL 8.6 [x86] +Patch35: kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch +# For bz#2046198 - CVE-2022-0358 virt:av/qemu-kvm: QEMU: virtiofsd: potential privilege escalation via CVE-2018-13405 [rhel-8.6] +Patch36: kvm-virtiofsd-Drop-membership-of-all-supplementary-group.patch +# For bz#2033279 - [wrb][qemu-kvm 6.2] The hot-unplugged device can not be hot-plugged back +Patch37: kvm-softmmu-fix-device-deletion-events-with-device-JSON-.patch +# For bz#2021778 - Qemu core dump when do full backup during system reset +# For bz#2036178 - Qemu core dumped when do block-stream to a snapshot node on non-enough space storage +Patch38: kvm-block-backend-prevent-dangling-BDS-pointers-across-a.patch +# For bz#2021778 - Qemu core dump when do full backup during system reset +# For bz#2036178 - Qemu core dumped when do block-stream to a snapshot node on non-enough space storage +Patch39: kvm-iotests-stream-error-on-reset-New-test.patch +# For bz#2037135 - Booting from Local Snapshot Core Dumped Whose Backing File Is Based on RBD +Patch40: kvm-block-rbd-fix-handling-of-holes-in-.bdrv_co_block_st.patch +# For bz#2037135 - Booting from Local Snapshot Core Dumped Whose Backing File Is Based on RBD +Patch41: kvm-block-rbd-workaround-for-ceph-issue-53784.patch +# For bz#1518984 - [Intel 8.6 Feat] qemu-kvm: SGX 1.5 (SGX1 + Flexible Launch Control) support +Patch42: kvm-numa-Enable-numa-for-SGX-EPC-sections.patch +# For bz#1518984 - [Intel 8.6 Feat] qemu-kvm: SGX 1.5 (SGX1 + Flexible Launch Control) support +Patch43: kvm-numa-Support-SGX-numa-in-the-monitor-and-Libvirt-int.patch +# For bz#1518984 - [Intel 8.6 Feat] qemu-kvm: SGX 1.5 (SGX1 + Flexible Launch Control) support +Patch44: kvm-doc-Add-the-SGX-numa-description.patch +# For bz#1518984 - [Intel 8.6 Feat] qemu-kvm: SGX 1.5 (SGX1 + Flexible Launch Control) support +Patch45: kvm-Enable-SGX-RH-Only.patch +# For bz#1518984 - [Intel 8.6 Feat] qemu-kvm: SGX 1.5 (SGX1 + Flexible Launch Control) support +Patch46: kvm-qapi-Cleanup-SGX-related-comments-and-restore-sectio.patch +# For bz#2041480 - [incremental_backup] Inconsistent block status reply in qemu-nbd +Patch47: kvm-block-io-Update-BSC-only-if-want_zero-is-true.patch +# For bz#2041480 - [incremental_backup] Inconsistent block status reply in qemu-nbd +Patch48: kvm-iotests-block-status-cache-New-test.patch +# For bz#2035185 - Qemu core dump when start guest with nbd node or do block jobs to nbd node +Patch49: kvm-block-nbd-Delete-reconnect-delay-timer-when-done.patch +# For bz#2035185 - Qemu core dump when start guest with nbd node or do block jobs to nbd node +Patch50: kvm-block-nbd-Assert-there-are-no-timers-when-closed.patch +# For bz#2035185 - Qemu core dump when start guest with nbd node or do block jobs to nbd node +Patch51: kvm-iotests.py-Add-QemuStorageDaemon-class.patch +# For bz#2035185 - Qemu core dump when start guest with nbd node or do block jobs to nbd node +Patch52: kvm-iotests-281-Test-lingering-timers.patch +# For bz#2035185 - Qemu core dump when start guest with nbd node or do block jobs to nbd node +Patch53: kvm-block-nbd-Move-s-ioc-on-AioContext-change.patch +# For bz#2035185 - Qemu core dump when start guest with nbd node or do block jobs to nbd node +Patch54: kvm-iotests-281-Let-NBD-connection-yield-in-iothread.patch +# For bz#2062613 - Revert IBM-specific Ubuntu-compatibility machine type for 8.6-AV GA [rhel-8.7.0] +Patch55: kvm-Revert-redhat-Add-hw_compat_4_2_extra-and-apply-to-u.patch +# For bz#2062613 - Revert IBM-specific Ubuntu-compatibility machine type for 8.6-AV GA [rhel-8.7.0] +Patch56: kvm-Revert-redhat-Enable-FDC-device-for-upstream-machine.patch +# For bz#2062613 - Revert IBM-specific Ubuntu-compatibility machine type for 8.6-AV GA [rhel-8.7.0] +Patch57: kvm-Revert-redhat-Expose-upstream-machines-pc-4.2-and-pc.patch +# For bz#2060843 - [virtual network][vDPA] qemu crash after hot unplug vdpa device [rhel-8.7.0] +Patch58: kvm-hw-virtio-vdpa-Fix-leak-of-host-notifier-memory-regi.patch +# For bz#2062610 - Do operation to disk will hang in the guest of target host after hotplugging and migrating [rhel-8.7.0] +Patch59: kvm-pci-expose-TYPE_XIO3130_DOWNSTREAM-name.patch +# For bz#2062610 - Do operation to disk will hang in the guest of target host after hotplugging and migrating [rhel-8.7.0] +Patch60: kvm-acpi-pcihp-pcie-set-power-on-cap-on-parent-slot.patch +# For bz#2065207 - Win11 (q35+edk2) guest broke after install wsl2 through 'wsl --install -d Ubuntu-20.04' [rhel-8.7.0] +Patch61: kvm-vmxcap-Add-5-level-EPT-bit.patch +# For bz#2065207 - Win11 (q35+edk2) guest broke after install wsl2 through 'wsl --install -d Ubuntu-20.04' [rhel-8.7.0] +Patch62: kvm-i386-Add-Icelake-Server-v6-CPU-model-with-5-level-EP.patch +# For bz#2062611 - Guest can not start with SLIC acpi table [rhel-8.7.0] +Patch63: kvm-acpi-fix-QEMU-crash-when-started-with-SLIC-table.patch +# For bz#2062611 - Guest can not start with SLIC acpi table [rhel-8.7.0] +Patch64: kvm-tests-acpi-whitelist-expected-blobs-before-changing-.patch +# For bz#2062611 - Guest can not start with SLIC acpi table [rhel-8.7.0] +Patch65: kvm-tests-acpi-add-SLIC-table-test.patch +# For bz#2062611 - Guest can not start with SLIC acpi table [rhel-8.7.0] +Patch66: kvm-tests-acpi-SLIC-update-expected-blobs.patch +# For bz#2062611 - Guest can not start with SLIC acpi table [rhel-8.7.0] +Patch67: kvm-tests-acpi-manually-pad-OEM_ID-OEM_TABLE_ID-for-test.patch +# For bz#2062611 - Guest can not start with SLIC acpi table [rhel-8.7.0] +Patch68: kvm-tests-acpi-whitelist-nvdimm-s-SSDT-and-FACP.slic-exp.patch +# For bz#2062611 - Guest can not start with SLIC acpi table [rhel-8.7.0] +Patch69: kvm-acpi-fix-OEM-ID-OEM-Table-ID-padding.patch +# For bz#2062611 - Guest can not start with SLIC acpi table [rhel-8.7.0] +Patch70: kvm-tests-acpi-update-expected-blobs.patch +# For bz#2062611 - Guest can not start with SLIC acpi table [rhel-8.7.0] +Patch71: kvm-tests-acpi-test-short-OEM_ID-OEM_TABLE_ID-values-in-.patch +# For bz#2068202 - RHEL 9.0 guest with vsock device migration failed from RHEL 9.0 > RHEL 8.6 [rhel-8.7.0] +Patch72: kvm-RHEL-disable-seqpacket-for-vhost-vsock-device-in-rhe.patch +# For bz#2067118 - qemu crash after execute blockdev-reopen with iothread +Patch73: kvm-block-Lock-AioContext-for-drain_end-in-blockdev-reop.patch +# For bz#2067118 - qemu crash after execute blockdev-reopen with iothread +Patch74: kvm-iotests-Test-blockdev-reopen-with-iothreads-and-thro.patch +# For bz#2071070 - s390x/css: fix PMCW invalid mask +Patch75: kvm-s390x-css-fix-PMCW-invalid-mask.patch +# For bz#1999236 - CVE-2021-3750 virt:rhel/qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue leads to use-after-free [rhel-8] +Patch76: kvm-hw-intc-arm_gicv3-Check-for-MEMTX_OK-instead-of-MEMT.patch +# For bz#1999236 - CVE-2021-3750 virt:rhel/qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue leads to use-after-free [rhel-8] +Patch77: kvm-softmmu-physmem-Simplify-flatview_write-and-address_.patch +# For bz#1999236 - CVE-2021-3750 virt:rhel/qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue leads to use-after-free [rhel-8] +Patch78: kvm-softmmu-physmem-Introduce-MemTxAttrs-memory-field-an.patch +# For bz#2040738 - CVE-2021-4207 virt:rhel/qemu-kvm: QEMU: QXL: double fetch in qxl_cursor() can lead to heap buffer overflow [rhel-8] +Patch79: kvm-display-qxl-render-fix-race-condition-in-qxl_cursor-.patch +# For bz#2063262 - CVE-2022-26354 virt:rhel/qemu-kvm: QEMU: vhost-vsock: missing virtqueue detach on error can lead to memory leak [rhel-8] +Patch80: kvm-vhost-vsock-detach-the-virqueue-element-in-case-of-e.patch + +BuildRequires: wget +BuildRequires: rpm-build +BuildRequires: ninja-build +#BuildRequires: meson >= 0.58.2 +BuildRequires: zlib-devel +BuildRequires: glib2-devel +BuildRequires: which +BuildRequires: gnutls-devel +BuildRequires: cyrus-sasl-devel +BuildRequires: libtool +BuildRequires: libaio-devel +BuildRequires: rsync +BuildRequires: python3-devel +BuildRequires: pciutils-devel +BuildRequires: libiscsi-devel +BuildRequires: ncurses-devel +BuildRequires: libattr-devel +BuildRequires: libusbx-devel >= 1.0.23 +%if %{have_usbredir} +BuildRequires: usbredir-devel >= 0.7.1 +%endif +BuildRequires: texinfo +BuildRequires: python3-sphinx +%if %{have_spice} +BuildRequires: spice-protocol >= 0.12.12 +BuildRequires: spice-server-devel >= 0.12.8 +BuildRequires: libcacard-devel +# For smartcard NSS support +BuildRequires: nss-devel +%endif +BuildRequires: libseccomp-devel >= 2.4.0 +# For network block driver +BuildRequires: libcurl-devel +BuildRequires: libssh-devel +BuildRequires: librados-devel +BuildRequires: librbd-devel +%if %{have_gluster} +# For gluster block driver +BuildRequires: glusterfs-api-devel +BuildRequires: glusterfs-devel +%endif +# We need both because the 'stap' binary is probed for by configure +BuildRequires: systemtap +BuildRequires: systemtap-sdt-devel +# For VNC PNG support +BuildRequires: libpng-devel +# For uuid generation +BuildRequires: libuuid-devel +# For Braille device support +BuildRequires: brlapi-devel +# For test suite +BuildRequires: check-devel +# For virtiofs +BuildRequires: libcap-ng-devel +# Hard requirement for version >= 1.3 +BuildRequires: pixman-devel +# Documentation requirement +BuildRequires: perl-podlators +BuildRequires: texinfo +BuildRequires: python3-sphinx +# For rdma +%if 0%{?have_librdma} +BuildRequires: rdma-core-devel +%endif +%if %{have_fdt} +BuildRequires: libfdt-devel >= 1.6.0 +%endif +# iasl and cpp for acpi generation (not a hard requirement as we can use +# pre-compiled files, but it's better to use this) +%ifarch %{ix86} x86_64 +BuildRequires: iasl +BuildRequires: cpp +%endif +# For compressed guest memory dumps +BuildRequires: lzo-devel snappy-devel +# For NUMA memory binding +%ifnarch s390x +BuildRequires: numactl-devel +%endif +BuildRequires: libgcrypt-devel +# qemu-pr-helper multipath support (requires libudev too) +BuildRequires: device-mapper-multipath-devel +BuildRequires: systemd-devel +# used by qemu-bridge-helper and qemu-pr-helper +BuildRequires: libcap-ng-devel + +BuildRequires: diffutils +%ifarch x86_64 +BuildRequires: libpmem-devel +Requires: libpmem +%endif + +# qemu-keymap +BuildRequires: pkgconfig(xkbcommon) + +# For s390-pgste flag +%ifarch s390x +BuildRequires: binutils >= 2.27-16 +%endif + +%if %{have_opengl} +BuildRequires: pkgconfig(epoxy) +BuildRequires: pkgconfig(libdrm) +BuildRequires: pkgconfig(gbm) +%endif + +BuildRequires: perl-Test-Harness + +Requires: qemu-kvm-core = %{epoch}:%{version}-%{release} +Requires: qemu-kvm-docs = %{epoch}:%{version}-%{release} +%rhev_ma_conflicts qemu-kvm + +%{requires_all_modules} + +%define qemudocdir %{_docdir}/%{name} + +%description +qemu-kvm is an open source virtualizer that provides hardware +emulation for the KVM hypervisor. qemu-kvm acts as a virtual +machine monitor together with the KVM kernel modules, and emulates the +hardware for a full system such as a PC and its associated peripherals. + + +%package -n qemu-kvm-core +Summary: qemu-kvm core components +Requires: %{name}-common = %{epoch}:%{version}-%{release} +Requires: qemu-img = %{epoch}:%{version}-%{release} +%ifarch %{ix86} x86_64 +Requires: edk2-ovmf +%endif +%ifarch aarch64 +Requires: edk2-aarch64 +%endif + +%ifarch %{power64} +Requires: SLOF >= %{SLOF_gittagdate}-1.git%{SLOF_gittagcommit} +%endif +Requires: libseccomp >= 2.4.0 +# For compressed guest memory dumps +Requires: lzo snappy +%if %{have_kvm_setup} +Requires(post): systemd-units +Requires(preun): systemd-units + %ifarch %{power64} +Requires: powerpc-utils + %endif +%endif +Requires: libusbx >= 1.0.23 +%if %{have_fdt} +Requires: libfdt >= 1.6.0 +%endif + +%rhev_ma_conflicts qemu-kvm + +%description -n qemu-kvm-core +qemu-kvm is an open source virtualizer that provides hardware +emulation for the KVM hypervisor. qemu-kvm acts as a virtual +machine monitor together with the KVM kernel modules, and emulates the +hardware for a full system such as a PC and its associated peripherals. + +%package -n qemu-kvm-docs +Summary: qemu-kvm documentation + +%description -n qemu-kvm-docs +qemu-kvm-docs provides documentation files regarding qemu-kvm. + +%package -n qemu-img +Summary: QEMU command line tool for manipulating disk images +Group: Development/Tools + +%rhev_ma_conflicts qemu-img + +%description -n qemu-img +This package provides a command line tool for manipulating disk images. + +%package -n qemu-kvm-common +Summary: QEMU common files needed by all QEMU targets +Group: Development/Tools +Requires(post): /usr/bin/getent +Requires(post): /usr/sbin/groupadd +Requires(post): /usr/sbin/useradd +Requires(post): systemd-units +Requires(preun): systemd-units +Requires(postun): systemd-units +%ifarch %{ix86} x86_64 +Requires: seabios-bin >= 1.10.2-1 +Requires: sgabios-bin +%endif +%ifnarch aarch64 s390x +Requires: seavgabios-bin >= 1.12.0-3 +Requires: ipxe-roms-qemu >= 20170123-1 +%endif + +%rhev_ma_conflicts qemu-kvm-common + +%description -n qemu-kvm-common +qemu-kvm is an open source virtualizer that provides hardware emulation for +the KVM hypervisor. + +This package provides documentation and auxiliary programs used with qemu-kvm. + + +%package -n qemu-guest-agent +Summary: QEMU guest agent +Requires(post): systemd-units +Requires(preun): systemd-units +Requires(postun): systemd-units + +%description -n qemu-guest-agent +qemu-kvm is an open source virtualizer that provides hardware emulation for +the KVM hypervisor. + +This package provides an agent to run inside guests, which communicates +with the host over a virtio-serial channel named "org.qemu.guest_agent.0" + +This package does not need to be installed on the host OS. + +%package tests +Summary: tests for the qemu-kvm package +Requires: %{name} = %{epoch}:%{version}-%{release} + +%define testsdir %{_libdir}/%{name}/tests-src + +%description tests +The qemu-kvm-tests rpm contains tests that can be used to verify +the functionality of the installed qemu-kvm package + +Install this package if you want access to the avocado_qemu +tests, or qemu-iotests. + +%package block-curl +Summary: QEMU CURL block driver +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} + +%description block-curl +This package provides the additional CURL block driver for QEMU. + +Install this package if you want to access remote disks over +http, https, ftp and other transports provided by the CURL library. + + +%if %{have_gluster} +%package block-gluster +Summary: QEMU Gluster block driver +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +%description block-gluster +This package provides the additional Gluster block driver for QEMU. + +Install this package if you want to access remote Gluster storage. +%endif + + +%package block-iscsi +Summary: QEMU iSCSI block driver +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} + +%description block-iscsi +This package provides the additional iSCSI block driver for QEMU. + +Install this package if you want to access iSCSI volumes. + + +%package block-rbd +Summary: QEMU Ceph/RBD block driver +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} + +%description block-rbd +This package provides the additional Ceph/RBD block driver for QEMU. + +Install this package if you want to access remote Ceph volumes +using the rbd protocol. + + +%package block-ssh +Summary: QEMU SSH block driver +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} + +%description block-ssh +This package provides the additional SSH block driver for QEMU. + +Install this package if you want to access remote disks using +the Secure Shell (SSH) protocol. + + +%if %{have_spice} +%package ui-spice +Summary: QEMU spice support +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +%if %{have_opengl} +Requires: %{name}-ui-opengl%{?_isa} = %{epoch}:%{version}-%{release} +%endif + +%description ui-spice +This package provides spice support. +%endif + + +%if %{have_opengl} +%package ui-opengl +Summary: QEMU opengl support +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +Requires: mesa-libGL +Requires: mesa-libEGL +Requires: mesa-dri-drivers + +%description ui-opengl +This package provides opengl support. +%endif + +%if %{have_usbredir} +%package hw-usbredir +Summary: QEMU usbredir support +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +Requires: usbredir >= 0.7.1 + +%description hw-usbredir +This package provides usbredir support. +%endif + + +%prep +%setup -q -n qemu-%{version}%{?rcstr} +# Remove slirp content in scratchbuilds because it's being applyed as a patch +rm -fr slirp +mkdir slirp +%autopatch -p1 + +%global qemu_kvm_build qemu_kvm_build +mkdir -p %{qemu_kvm_build} + +cp -f %{SOURCE37} tests/data/acpi/pc/SSDT.dimmpxm +cp -f %{SOURCE38} tests/data/acpi/q35/FACP.slic +cp -f %{SOURCE39} tests/data/acpi/q35/SSDT.dimmpxm +cp -f %{SOURCE40} tests/data/acpi/virt/SSDT.memhp + +%build +%global buildarch %{kvm_target}-softmmu + +# --build-id option is used for giving info to the debug packages. +buildldflags="VL_LDFLAGS=-Wl,--build-id" + +%global block_drivers_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle + +%if 0%{have_gluster} + %global block_drivers_list %{block_drivers_list},gluster +%endif + + +%define disable_everything \\\ + --disable-alsa \\\ + --disable-attr \\\ + --disable-auth-pam \\\ + --disable-avx2 \\\ + --disable-avx512f \\\ + --disable-bochs \\\ + --disable-bpf \\\ + --disable-brlapi \\\ + --disable-bsd-user \\\ + --disable-bzip2 \\\ + --disable-cap-ng \\\ + --disable-capstone \\\ + --disable-cfi \\\ + --disable-cfi-debug \\\ + --disable-cloop \\\ + --disable-cocoa \\\ + --disable-coreaudio \\\ + --disable-coroutine-pool \\\ + --disable-crypto-afalg \\\ + --disable-curl \\\ + --disable-curses \\\ + --disable-debug-info \\\ + --disable-debug-mutex \\\ + --disable-debug-tcg \\\ + --disable-dmg \\\ + --disable-docs \\\ + --disable-dsound \\\ + --disable-fdt \\\ + --disable-fuse \\\ + --disable-fuse-lseek \\\ + --disable-gcrypt \\\ + --disable-gettext \\\ + --disable-gio \\\ + --disable-glusterfs \\\ + --disable-gnutls \\\ + --disable-gtk \\\ + --disable-guest-agent \\\ + --disable-guest-agent-msi \\\ + --disable-hax \\\ + --disable-hvf \\\ + --disable-iconv \\\ + --disable-jack \\\ + --disable-kvm \\\ + --disable-l2tpv3 \\\ + --disable-libdaxctl \\\ + --disable-libiscsi \\\ + --disable-libnfs \\\ + --disable-libpmem \\\ + --disable-libssh \\\ + --disable-libudev \\\ + --disable-libusb \\\ + --disable-libxml2 \\\ + --disable-linux-aio \\\ + --disable-linux-io-uring \\\ + --disable-linux-user \\\ + --disable-live-block-migration \\\ + --disable-lto \\\ + --disable-lzfse \\\ + --disable-lzo \\\ + --disable-malloc-trim \\\ + --disable-membarrier \\\ + --disable-modules \\\ + --disable-module-upgrades \\\ + --disable-mpath \\\ + --disable-multiprocess \\\ + --disable-netmap \\\ + --disable-nettle \\\ + --disable-numa \\\ + --disable-nvmm \\\ + --disable-opengl \\\ + --disable-oss \\\ + --disable-pa \\\ + --disable-parallels \\\ + --disable-pie \\\ + --disable-pvrdma \\\ + --disable-qcow1 \\\ + --disable-qed \\\ + --disable-qom-cast-debug \\\ + --disable-rbd \\\ + --disable-rdma \\\ + --disable-replication \\\ + --disable-rng-none \\\ + --disable-safe-stack \\\ + --disable-sanitizers \\\ + --disable-sdl \\\ + --disable-sdl-image \\\ + --disable-seccomp \\\ + --disable-selinux \\\ + --disable-slirp-smbd \\\ + --disable-smartcard \\\ + --disable-snappy \\\ + --disable-sparse \\\ + --disable-spice \\\ + --disable-spice-protocol \\\ + --disable-strip \\\ + --disable-system \\\ + --disable-tcg \\\ + --disable-tools \\\ + --disable-tpm \\\ + --disable-u2f \\\ + --disable-usb-redir \\\ + --disable-user \\\ + --disable-vde \\\ + --disable-vdi \\\ + --disable-vhost-crypto \\\ + --disable-vhost-kernel \\\ + --disable-vhost-net \\\ + --disable-vhost-scsi \\\ + --disable-vhost-user \\\ + --disable-vhost-user-blk-server \\\ + --disable-vhost-vdpa \\\ + --disable-vhost-vsock \\\ + --disable-virglrenderer \\\ + --disable-virtfs \\\ + --disable-virtiofsd \\\ + --disable-vnc \\\ + --disable-vnc-jpeg \\\ + --disable-vnc-png \\\ + --disable-vnc-sasl \\\ + --disable-vte \\\ + --disable-vvfat \\\ + --disable-werror \\\ + --disable-whpx \\\ + --disable-xen \\\ + --disable-xen-pci-passthrough \\\ + --disable-xfsctl \\\ + --disable-xkbcommon \\\ + --disable-zstd \\\ + --with-git-submodules=ignore + +pushd %{qemu_kvm_build} +../configure \ + --prefix="%{_prefix}" \ + --libdir="%{_libdir}" \ + --datadir="%{_datadir}" \ + --sysconfdir="%{_sysconfdir}" \ + --interp-prefix=%{_prefix}/qemu-%M \ + --localstatedir="%{_localstatedir}" \ + --docdir="%{_docdir}" \ + --libexecdir="%{_libexecdir}" \ + --extra-ldflags="-Wl,--build-id -Wl,-z,relro -Wl,-z,now" \ + --extra-cflags="%{optflags}" \ + --with-pkgversion="%{name}-%{version}-%{release}" \ + --with-suffix="%{name}" \ + --firmwarepath=%{_prefix}/share/qemu-firmware \ + --meson="git" \ + --target-list="%{buildarch}" \ + --block-drv-rw-whitelist=%{block_drivers_list} \ + --audio-drv-list= \ + --block-drv-ro-whitelist=vmdk,vhdx,vpc,https,ssh \ + --with-coroutine=ucontext \ + --with-git=git \ + --tls-priority=@QEMU,SYSTEM \ + %{disable_everything} \ + --enable-attr \ +%ifarch %{ix86} x86_64 + --enable-avx2 \ +%endif + --enable-cap-ng \ + --enable-capstone=internal \ + --enable-coroutine-pool \ + --enable-curl \ + --enable-debug-info \ + --enable-docs \ +%if 0%{have_fdt} + --enable-fdt=system \ +%endif + --enable-gcrypt \ +%if 0%{have_gluster} + --enable-glusterfs \ +%endif + --enable-gnutls \ + --enable-guest-agent \ + --enable-iconv \ + --enable-kvm \ + --enable-libiscsi \ +%ifarch x86_64 + --enable-libpmem \ +%endif + --enable-libssh \ + --enable-libusb \ + --enable-libudev \ + --enable-linux-aio \ + --enable-lzo \ + --enable-malloc-trim \ + --enable-modules \ + --enable-mpath \ +%ifnarch s390x + --enable-numa \ +%endif +%if 0%{have_opengl} + --enable-opengl \ +%endif + --enable-pie \ + --enable-rbd \ +%if 0%{have_librdma} + --enable-rdma \ +%endif + --enable-seccomp \ + --enable-snappy \ +%if 0%{have_spice} + --enable-smartcard \ + --enable-spice \ + --enable-spice-protocol \ +%endif + --enable-system \ + --enable-tcg \ + --enable-tools \ + --enable-tpm \ + --enable-trace-backend=dtrace \ +%if 0%{have_usbredir} + --enable-usb-redir \ +%endif + --enable-virtiofsd \ + --enable-vhost-kernel \ + --enable-vhost-net \ + --enable-vhost-user \ + --enable-vhost-user-blk-server \ + --enable-vhost-vdpa \ + --enable-vhost-vsock \ + --enable-vnc \ + --enable-vnc-png \ + --enable-vnc-sasl \ + --enable-werror \ + --enable-xkbcommon \ + --without-default-devices \ + --with-devices-%{kvm_target}=%{kvm_target}-rh-devices + + +echo "qemu-kvm config-host.mak contents:" +echo "===" +cat config-host.mak +echo "===" + +make V=1 %{?_smp_mflags} $buildldflags + +# Setup back compat qemu-kvm binary +%{__python3} scripts/tracetool.py --backend dtrace --format stap \ + --group=all --binary %{_libexecdir}/qemu-kvm --probe-prefix qemu.kvm \ + trace/trace-events-all qemu-kvm.stp + +%{__python3} scripts/tracetool.py --backends=dtrace --format=log-stap \ + --group=all --binary %{_libexecdir}/qemu-kvm --probe-prefix qemu.kvm \ + trace/trace-events-all qemu-kvm-log.stp + +%{__python3} scripts/tracetool.py --backend dtrace --format simpletrace-stap \ + --group=all --binary %{_libexecdir}/qemu-kvm --probe-prefix qemu.kvm \ + trace/trace-events-all qemu-kvm-simpletrace.stp + +cp -a %{kvm_target}-softmmu/qemu-system-%{kvm_target} qemu-kvm + +gcc %{SOURCE6} $RPM_OPT_FLAGS $RPM_LD_FLAGS -o ksmctl +gcc %{SOURCE35} $RPM_OPT_FLAGS $RPM_LD_FLAGS -o udev-kvm-check + +%ifarch s390x + # Copy the built new images into place for "make check": + cp pc-bios/s390-ccw/s390-ccw.img pc-bios/s390-ccw/s390-netboot.img pc-bios/ +%endif + +popd + +%install +pushd %{qemu_kvm_build} +%define _udevdir %(pkg-config --variable=udevdir udev) +%define _udevrulesdir %{_udevdir}/rules.d + +install -D -p -m 0644 %{SOURCE4} $RPM_BUILD_ROOT%{_unitdir}/ksm.service +install -D -p -m 0644 %{SOURCE5} $RPM_BUILD_ROOT%{_sysconfdir}/sysconfig/ksm +install -D -p -m 0755 ksmctl $RPM_BUILD_ROOT%{_libexecdir}/ksmctl + +install -D -p -m 0644 %{SOURCE7} $RPM_BUILD_ROOT%{_unitdir}/ksmtuned.service +install -D -p -m 0755 %{SOURCE8} $RPM_BUILD_ROOT%{_sbindir}/ksmtuned +install -D -p -m 0644 %{SOURCE9} $RPM_BUILD_ROOT%{_sysconfdir}/ksmtuned.conf +install -D -p -m 0644 %{SOURCE26} $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/vhost.conf +%ifarch s390x + install -D -p -m 0644 %{SOURCE30} $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/kvm.conf +%else +%ifarch %{ix86} x86_64 + install -D -p -m 0644 %{SOURCE31} $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/kvm.conf +%else + install -D -p -m 0644 %{SOURCE27} $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/kvm.conf +%endif +%endif + +mkdir -p $RPM_BUILD_ROOT%{_bindir}/ +mkdir -p $RPM_BUILD_ROOT%{_udevrulesdir}/ +mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name} + +# Create new directories and put them all under tests-src +mkdir -p $RPM_BUILD_ROOT%{testsdir}/python +mkdir -p $RPM_BUILD_ROOT%{testsdir}/tests +mkdir -p $RPM_BUILD_ROOT%{testsdir}/tests/avocado +mkdir -p $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests +mkdir -p $RPM_BUILD_ROOT%{testsdir}/scripts/qmp + +install -p -m 0755 udev-kvm-check $RPM_BUILD_ROOT%{_udevdir} +install -p -m 0644 %{SOURCE34} $RPM_BUILD_ROOT%{_udevrulesdir} + +install -m 0644 scripts/dump-guest-memory.py \ + $RPM_BUILD_ROOT%{_datadir}/%{name} + +# Install avocado_qemu tests +cp -R tests/avocado/* $RPM_BUILD_ROOT%{testsdir}/tests/avocado/ + +# Install qemu.py and qmp/ scripts required to run avocado_qemu tests +cp -R python/qemu $RPM_BUILD_ROOT%{testsdir}/python +cp -R scripts/qmp/* $RPM_BUILD_ROOT%{testsdir}/scripts/qmp +install -p -m 0644 ../tests/Makefile.include $RPM_BUILD_ROOT%{testsdir}/tests/ + +# Install qemu-iotests +cp -R ../tests/qemu-iotests/* $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests/ +cp -ur tests/qemu-iotests/* $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests/ +# Avoid ambiguous 'python' interpreter name +find $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/env \(python\|python3\)+%{__python3}+' {} \; +find $RPM_BUILD_ROOT%{testsdir}/scripts/qmp/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/env \(python\|python3\)+%{__python3}+' {} \; +find $RPM_BUILD_ROOT%{testsdir}/scripts/qmp/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/\(python\|python3\)+%{__python3}+' {} \; + +install -p -m 0644 %{SOURCE36} $RPM_BUILD_ROOT%{testsdir}/README + +make DESTDIR=$RPM_BUILD_ROOT \ + sharedir="%{_datadir}/%{name}" \ + datadir="%{_datadir}/%{name}" \ + install + +mkdir -p $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset + +# Move vhost-user JSON files to the standard "qemu" directory +mkdir -p $RPM_BUILD_ROOT%{_datadir}/qemu +mv $RPM_BUILD_ROOT%{_datadir}/%{name}/vhost-user $RPM_BUILD_ROOT%{_datadir}/qemu/ + +# Install qemu-guest-agent service and udev rules +install -m 0644 %{_sourcedir}/qemu-guest-agent.service %{buildroot}%{_unitdir} +install -m 0644 %{_sourcedir}/qemu-ga.sysconfig %{buildroot}%{_sysconfdir}/sysconfig/qemu-ga +install -m 0644 %{_sourcedir}/99-qemu-guest-agent.rules %{buildroot}%{_udevrulesdir} + +# - the fsfreeze hook script: +install -D --preserve-timestamps \ + scripts/qemu-guest-agent/fsfreeze-hook \ + $RPM_BUILD_ROOT%{_sysconfdir}/qemu-ga/fsfreeze-hook +# Workaround for the missing /etc/qemu-kvm/fsfreeze-hook +# Please, do not carry this over to RHEL-9 +mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/qemu-kvm/ +ln -s %{_sysconfdir}/qemu-ga/fsfreeze-hook \ + $RPM_BUILD_ROOT%{_sysconfdir}/qemu-kvm/fsfreeze-hook + +# - the directory for user scripts: +mkdir $RPM_BUILD_ROOT%{_sysconfdir}/qemu-ga/fsfreeze-hook.d + +# - and the fsfreeze script samples: +mkdir --parents $RPM_BUILD_ROOT%{_datadir}/%{name}/qemu-ga/fsfreeze-hook.d/ +install --preserve-timestamps --mode=0644 \ + scripts/qemu-guest-agent/fsfreeze-hook.d/*.sample \ + $RPM_BUILD_ROOT%{_datadir}/%{name}/qemu-ga/fsfreeze-hook.d/ + +# - Install dedicated log directory: +mkdir -p -v $RPM_BUILD_ROOT%{_localstatedir}/log/qemu-ga/ + +mkdir -p $RPM_BUILD_ROOT%{_bindir} +install -c -m 0755 qga/qemu-ga ${RPM_BUILD_ROOT}%{_bindir}/qemu-ga + +mkdir -p $RPM_BUILD_ROOT%{_mandir}/man8 + +install -m 0755 %{kvm_target}-softmmu/qemu-system-%{kvm_target} $RPM_BUILD_ROOT%{_libexecdir}/qemu-kvm +install -m 0644 qemu-kvm.stp $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/ +install -m 0644 qemu-kvm-log.stp $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/ +install -m 0644 qemu-kvm-simpletrace.stp $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/ +install -d -m 0755 "$RPM_BUILD_ROOT%{_datadir}/%{name}/systemtap/script.d" +install -c -m 0644 scripts/systemtap/script.d/qemu_kvm.stp "$RPM_BUILD_ROOT%{_datadir}/%{name}/systemtap/script.d/" +install -d -m 0755 "$RPM_BUILD_ROOT%{_datadir}/%{name}/systemtap/conf.d" +install -c -m 0644 scripts/systemtap/conf.d/qemu_kvm.conf "$RPM_BUILD_ROOT%{_datadir}/%{name}/systemtap/conf.d/" + + +rm $RPM_BUILD_ROOT/%{_datadir}/applications/qemu.desktop +rm $RPM_BUILD_ROOT%{_bindir}/qemu-system-%{kvm_target} +rm $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/qemu-system-%{kvm_target}.stp +rm $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/qemu-system-%{kvm_target}-simpletrace.stp +rm $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/qemu-system-%{kvm_target}-log.stp +rm $RPM_BUILD_ROOT%{_bindir}/elf2dmp + +# Install simpletrace +install -m 0755 scripts/simpletrace.py $RPM_BUILD_ROOT%{_datadir}/%{name}/simpletrace.py +# Avoid ambiguous 'python' interpreter name +mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool +install -m 0644 -t $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool scripts/tracetool/*.py +mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/backend +install -m 0644 -t $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/backend scripts/tracetool/backend/*.py +mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/format +install -m 0644 -t $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/format scripts/tracetool/format/*.py + +mkdir -p $RPM_BUILD_ROOT%{qemudocdir} +install -p -m 0644 -t ${RPM_BUILD_ROOT}%{qemudocdir} ../README.rst ../README.systemtap ../COPYING ../COPYING.LIB ../LICENSE ../docs/interop/qmp-spec.txt + +# Rename man page +pushd ${RPM_BUILD_ROOT}%{_mandir}/man1/ +for fn in qemu.1*; do + mv $fn "qemu-kvm${fn#qemu}" +done +popd +chmod -x ${RPM_BUILD_ROOT}%{_mandir}/man1/* +chmod -x ${RPM_BUILD_ROOT}%{_mandir}/man8/* + +install -D -p -m 0644 ../qemu.sasl $RPM_BUILD_ROOT%{_sysconfdir}/sasl2/%{name}.conf + +# Install keymaps +pushd pc-bios/keymaps +for kmp in *; do + install -m 0644 $kmp ${RPM_BUILD_ROOT}%{_datadir}/%{name}/keymaps/ +done +rm -f ${RPM_BUILD_ROOT}%{_datadir}/%{name}/keymaps/*.stamp +popd + +# Provided by package openbios +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/openbios-ppc +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/openbios-sparc32 +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/openbios-sparc64 +# Provided by package SLOF +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/slof.bin + +# Remove unpackaged files. +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/palcode-clipper +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/petalogix*.dtb +rm -f ${RPM_BUILD_ROOT}%{_datadir}/%{name}/bamboo.dtb +rm -f ${RPM_BUILD_ROOT}%{_datadir}/%{name}/ppc_rom.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/s390-zipl.rom +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/u-boot.e500 +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/qemu_vga.ndrv +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/skiboot.lid +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/qboot.rom + +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/s390-ccw.img +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/s390-netboot.img +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/hppa-firmware.img +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/canyonlands.dtb +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/u-boot-sam460-20100605.bin + +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/firmware +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/edk2-*.fd +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/edk2-licenses.txt + +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv32-sifive_u-fw_jump.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv32-virt-fw_jump.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv32-generic-fw_dynamic.* +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv64-sifive_u-fw_jump.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv64-virt-fw_jump.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv64-generic-fw_dynamic.* +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/qemu-nsis.bmp +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/npcm7xx_bootrom.bin + +rm -rf ${RPM_BUILD_ROOT}%{_libdir}/qemu-kvm/ui-spice-app.so + +# Remove virtfs-proxy-helper files +rm -rf ${RPM_BUILD_ROOT}%{_libexecdir}/virtfs-proxy-helper +rm -rf ${RPM_BUILD_ROOT}%{_mandir}/man1/virtfs-proxy-helper* + +%ifarch s390x + # Use the s390-*.imgs that we've just built, not the pre-built ones + install -m 0644 pc-bios/s390-ccw/s390-ccw.img $RPM_BUILD_ROOT%{_datadir}/%{name}/ + install -m 0644 pc-bios/s390-ccw/s390-netboot.img $RPM_BUILD_ROOT%{_datadir}/%{name}/ +%else + rm -rf ${RPM_BUILD_ROOT}%{_libdir}/qemu-kvm/hw-s390x-virtio-gpu-ccw.so +%endif + +%ifnarch x86_64 + rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/kvmvapic.bin + rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/linuxboot.bin + rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/multiboot.bin + rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/multiboot_dma.bin + rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/pvh.bin +%endif + +# Remove sparc files +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/QEMU,tcx.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/QEMU,cgthree.bin + +# Remove ivshmem example programs +rm -rf ${RPM_BUILD_ROOT}%{_bindir}/ivshmem-client +rm -rf ${RPM_BUILD_ROOT}%{_bindir}/ivshmem-server + +# Remove efi roms +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/efi*.rom + +# Provided by package ipxe +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/pxe*rom +# Provided by package vgabios +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/vgabios*bin +# Provided by package seabios +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/bios*.bin +# Provided by package sgabios +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/sgabios.bin + +# the pxe gpxe images will be symlinks to the images on +# /usr/share/ipxe, as QEMU doesn't know how to look +# for other paths, yet. +pxe_link() { + ln -s ../ipxe.efi/$2.rom %{buildroot}%{_datadir}/%{name}/efi-$1.rom +} + +%ifnarch aarch64 s390x +pxe_link e1000 8086100e +pxe_link ne2k_pci 10ec8029 +pxe_link pcnet 10222000 +pxe_link rtl8139 10ec8139 +pxe_link virtio 1af41000 +pxe_link e1000e 808610d3 +%endif + +rom_link() { + ln -s $1 %{buildroot}%{_datadir}/%{name}/$2 +} + +%ifnarch aarch64 s390x + rom_link ../seavgabios/vgabios-isavga.bin vgabios.bin + rom_link ../seavgabios/vgabios-cirrus.bin vgabios-cirrus.bin + rom_link ../seavgabios/vgabios-qxl.bin vgabios-qxl.bin + rom_link ../seavgabios/vgabios-stdvga.bin vgabios-stdvga.bin + rom_link ../seavgabios/vgabios-vmware.bin vgabios-vmware.bin + rom_link ../seavgabios/vgabios-virtio.bin vgabios-virtio.bin + rom_link ../seavgabios/vgabios-ramfb.bin vgabios-ramfb.bin + rom_link ../seavgabios/vgabios-bochs-display.bin vgabios-bochs-display.bin +%endif +%ifarch x86_64 + rom_link ../seabios/bios.bin bios.bin + rom_link ../seabios/bios-256k.bin bios-256k.bin + rom_link ../sgabios/sgabios.bin sgabios.bin +%endif + +%if 0%{have_kvm_setup} + install -D -p -m 755 %{SOURCE21} $RPM_BUILD_ROOT%{_prefix}/lib/systemd/kvm-setup + install -D -p -m 644 %{SOURCE22} $RPM_BUILD_ROOT%{_unitdir}/kvm-setup.service + install -D -p -m 644 %{SOURCE23} $RPM_BUILD_ROOT%{_presetdir}/85-kvm.preset +%endif + +%if 0%{have_memlock_limits} + install -D -p -m 644 %{SOURCE28} $RPM_BUILD_ROOT%{_sysconfdir}/security/limits.d/95-kvm-memlock.conf +%endif + +# Install rules to use the bridge helper with libvirt's virbr0 +install -D -m 0644 %{SOURCE12} $RPM_BUILD_ROOT%{_sysconfdir}/%{name}/bridge.conf + +# Install qemu-pr-helper service +install -m 0644 %{_sourcedir}/qemu-pr-helper.service %{buildroot}%{_unitdir} +install -m 0644 %{_sourcedir}/qemu-pr-helper.socket %{buildroot}%{_unitdir} + +find $RPM_BUILD_ROOT -name '*.la' -or -name '*.a' | xargs rm -f + +# We need to make the block device modules and other qemu SO files executable +# otherwise RPM won't pick up their dependencies. +chmod +x $RPM_BUILD_ROOT%{_libdir}/qemu-kvm/*.so + +# Remove buildinfo +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/interop/.buildinfo +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/system/.buildinfo +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/tools/.buildinfo +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/user/.buildinfo +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/devel/.buildinfo +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/.buildinfo + +# Remove spec +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/specs + +popd + +%check +pushd %{qemu_kvm_build} +echo "Testing qemu-kvm-build" +export DIFF=diff; make check V=1 +popd + +%post -n qemu-kvm-common +%systemd_post ksm.service +%systemd_post ksmtuned.service + +getent group kvm >/dev/null || groupadd -g 36 -r kvm +getent group qemu >/dev/null || groupadd -g 107 -r qemu +getent passwd qemu >/dev/null || \ +useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ + -c "qemu user" qemu + +# load kvm modules now, so we can make sure no reboot is needed. +# If there's already a kvm module installed, we don't mess with it +%udev_rules_update +sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : + udevadm trigger --subsystem-match=misc --sysname-match=kvm --action=add || : +%if %{have_kvm_setup} + systemctl daemon-reload # Make sure it sees the new presets and unitfile + %systemd_post kvm-setup.service + if systemctl is-enabled kvm-setup.service > /dev/null; then + systemctl start kvm-setup.service + fi +%endif + +%preun -n qemu-kvm-common +%systemd_preun ksm.service +%systemd_preun ksmtuned.service +%if %{have_kvm_setup} +%systemd_preun kvm-setup.service +%endif + +%postun -n qemu-kvm-common +%systemd_postun_with_restart ksm.service +%systemd_postun_with_restart ksmtuned.service + +%post -n qemu-guest-agent +%systemd_post qemu-guest-agent.service +%preun -n qemu-guest-agent +%systemd_preun qemu-guest-agent.service +%postun -n qemu-guest-agent +%systemd_postun_with_restart qemu-guest-agent.service + +%files +# Deliberately empty + +%files -n qemu-kvm-docs +%defattr(-,root,root) +%dir %{qemudocdir} +%doc %{qemudocdir}/genindex.html +%doc %{qemudocdir}/search.html +%doc %{qemudocdir}/objects.inv +%doc %{qemudocdir}/searchindex.js +%doc %{qemudocdir}/README.rst +%doc %{qemudocdir}/COPYING +%doc %{qemudocdir}/COPYING.LIB +%doc %{qemudocdir}/LICENSE +%doc %{qemudocdir}/README.systemtap +%doc %{qemudocdir}/qmp-spec.txt +%doc %{qemudocdir}/interop/* +%doc %{qemudocdir}/index.html +%doc %{qemudocdir}/about/* +%doc %{qemudocdir}/system/* +%doc %{qemudocdir}/tools/* +%doc %{qemudocdir}/user/* +%doc %{qemudocdir}/devel/* +%doc %{qemudocdir}/_static/* + +%files -n qemu-kvm-common +%defattr(-,root,root) +%{_mandir}/man7/qemu-qmp-ref.7* +%{_mandir}/man7/qemu-cpu-models.7* +%{_bindir}/qemu-keymap +%{_bindir}/qemu-pr-helper +%{_bindir}/qemu-edid +%{_bindir}/qemu-trace-stap +%{_unitdir}/qemu-pr-helper.service +%{_unitdir}/qemu-pr-helper.socket +%{_mandir}/man7/qemu-ga-ref.7* +%{_mandir}/man8/qemu-pr-helper.8* +%{_mandir}/man1/virtiofsd.1* + +%dir %{_datadir}/%{name}/ +%{_datadir}/%{name}/keymaps/ +%{_mandir}/man1/%{name}.1* +%{_mandir}/man1/qemu-trace-stap.1* +%{_mandir}/man7/qemu-block-drivers.7* +%attr(4755, -, -) %{_libexecdir}/qemu-bridge-helper +%config(noreplace) %{_sysconfdir}/sasl2/%{name}.conf +%{_unitdir}/ksm.service +%{_libexecdir}/ksmctl +%config(noreplace) %{_sysconfdir}/sysconfig/ksm +%{_unitdir}/ksmtuned.service +%{_sbindir}/ksmtuned +%{_udevdir}/udev-kvm-check +%{_udevrulesdir}/81-kvm-rhel.rules +%ghost %{_sysconfdir}/kvm +%config(noreplace) %{_sysconfdir}/ksmtuned.conf +%dir %{_sysconfdir}/%{name} +%config(noreplace) %{_sysconfdir}/%{name}/bridge.conf +%config(noreplace) %{_sysconfdir}/modprobe.d/vhost.conf +%config(noreplace) %{_sysconfdir}/modprobe.d/kvm.conf +%{_datadir}/%{name}/simpletrace.py* +%{_datadir}/%{name}/tracetool/*.py* +%{_datadir}/%{name}/tracetool/backend/*.py* +%{_datadir}/%{name}/tracetool/format/*.py* + +%ifarch x86_64 + %{_datadir}/%{name}/bios.bin + %{_datadir}/%{name}/bios-256k.bin + %{_datadir}/%{name}/linuxboot.bin + %{_datadir}/%{name}/multiboot.bin + %{_datadir}/%{name}/multiboot_dma.bin + %{_datadir}/%{name}/kvmvapic.bin + %{_datadir}/%{name}/sgabios.bin + %{_datadir}/%{name}/pvh.bin +%endif +%ifarch s390x + %{_datadir}/%{name}/s390-ccw.img + %{_datadir}/%{name}/s390-netboot.img +%endif +%ifnarch aarch64 s390x + %{_datadir}/%{name}/vgabios.bin + %{_datadir}/%{name}/vgabios-cirrus.bin + %{_datadir}/%{name}/vgabios-qxl.bin + %{_datadir}/%{name}/vgabios-stdvga.bin + %{_datadir}/%{name}/vgabios-vmware.bin + %{_datadir}/%{name}/vgabios-virtio.bin + %{_datadir}/%{name}/vgabios-ramfb.bin + %{_datadir}/%{name}/vgabios-bochs-display.bin + %{_datadir}/%{name}/efi-e1000.rom + %{_datadir}/%{name}/efi-e1000e.rom + %{_datadir}/%{name}/efi-virtio.rom + %{_datadir}/%{name}/efi-pcnet.rom + %{_datadir}/%{name}/efi-rtl8139.rom + %{_datadir}/%{name}/efi-ne2k_pci.rom + %{_libdir}/qemu-kvm/hw-display-virtio-vga.so +%endif + %{_libdir}/%{name}/hw-display-virtio-gpu-gl.so +%ifnarch s390x + %{_libdir}/%{name}/hw-display-virtio-gpu-pci-gl.so +%endif +%ifarch x86_64 %{power64} + %{_libdir}/%{name}/hw-display-virtio-vga-gl.so +%endif + %{_libdir}/%{name}/accel-qtest-%{kvm_target}.so +%ifarch x86_64 + %{_libdir}/%{name}/accel-tcg-%{kvm_target}.so +%endif +%{_libdir}/%{name}/hw-usb-host.so +%{_datadir}/icons/* +%{_datadir}/%{name}/linuxboot_dma.bin +%{_datadir}/%{name}/dump-guest-memory.py* +%{_datadir}/%{name}/trace-events-all +%if 0%{have_kvm_setup} + %{_prefix}/lib/systemd/kvm-setup + %{_unitdir}/kvm-setup.service + %{_presetdir}/85-kvm.preset +%endif +%if 0%{have_memlock_limits} + %{_sysconfdir}/security/limits.d/95-kvm-memlock.conf +%endif +%{_libexecdir}/virtiofsd + +# This is the standard location for vhost-user JSON files defined in the +# vhost-user specification for interoperability with other software. Unlike +# most other paths we use it's "qemu" instead of "qemu-kvm". +%{_datadir}/qemu/vhost-user/50-qemu-virtiofsd.json + +%files -n qemu-kvm-core +%defattr(-,root,root) +%{_libexecdir}/qemu-kvm +%{_datadir}/systemtap/tapset/qemu-kvm.stp +%{_datadir}/systemtap/tapset/qemu-kvm-log.stp +%{_datadir}/systemtap/tapset/qemu-kvm-simpletrace.stp +%{_datadir}/%{name}/systemtap/script.d/qemu_kvm.stp +%{_datadir}/%{name}/systemtap/conf.d/qemu_kvm.conf + +%{_libdir}/qemu-kvm/hw-display-virtio-gpu.so +%ifarch s390x + %{_libdir}/qemu-kvm/hw-s390x-virtio-gpu-ccw.so +%else + %{_libdir}/qemu-kvm/hw-display-virtio-gpu-pci.so +%endif + +%files -n qemu-img +%defattr(-,root,root) +%{_bindir}/qemu-img +%{_bindir}/qemu-io +%{_bindir}/qemu-nbd +%{_bindir}/qemu-storage-daemon +%{_mandir}/man1/qemu-img.1* +%{_mandir}/man8/qemu-nbd.8* +%{_mandir}/man1/qemu-storage-daemon.1* +%{_mandir}/man7/qemu-storage-daemon-qmp-ref.7* + +%files -n qemu-guest-agent +%defattr(-,root,root,-) +%doc COPYING README.rst +%{_bindir}/qemu-ga +%{_mandir}/man8/qemu-ga.8* +%{_unitdir}/qemu-guest-agent.service +%{_udevrulesdir}/99-qemu-guest-agent.rules +%config(noreplace) %{_sysconfdir}/sysconfig/qemu-ga +%{_sysconfdir}/qemu-ga +%{_sysconfdir}/qemu-kvm/fsfreeze-hook +%{_datadir}/%{name}/qemu-ga +%dir %{_localstatedir}/log/qemu-ga + +%files tests +%{testsdir} + +%files block-curl +%{_libdir}/qemu-kvm/block-curl.so + +%if %{have_gluster} +%files block-gluster +%{_libdir}/qemu-kvm/block-gluster.so +%endif + +%files block-iscsi +%{_libdir}/qemu-kvm/block-iscsi.so + +%files block-rbd +%{_libdir}/qemu-kvm/block-rbd.so + +%files block-ssh +%{_libdir}/qemu-kvm/block-ssh.so + +%if 0%{have_spice} +%files ui-spice + %{_libdir}/qemu-kvm/hw-usb-smartcard.so + %{_libdir}/qemu-kvm/audio-spice.so + %{_libdir}/qemu-kvm/ui-spice-core.so + %{_libdir}/qemu-kvm/chardev-spice.so +%ifarch x86_64 + %{_libdir}/qemu-kvm/hw-display-qxl.so +%endif +%endif + +%if 0%{have_opengl} +%files ui-opengl + %{_libdir}/qemu-kvm/ui-egl-headless.so + %{_libdir}/qemu-kvm/ui-opengl.so +%endif + +%if %{have_usbredir} +%files hw-usbredir + %{_libdir}/qemu-kvm/hw-usb-redirect.so +%endif + + +%changelog +* Thu Apr 21 2022 Jon Maloy - 6.2.0-12 +- kvm-display-qxl-render-fix-race-condition-in-qxl_cursor-.patch [bz#2040738] +- kvm-vhost-vsock-detach-the-virqueue-element-in-case-of-e.patch [bz#2063262] +- Resolves: bz#2040738 + (CVE-2021-4207 virt:rhel/qemu-kvm: QEMU: QXL: double fetch in qxl_cursor() can lead to heap buffer overflow [rhel-8]) +- Resolves: bz#2063262 + (CVE-2022-26354 virt:rhel/qemu-kvm: QEMU: vhost-vsock: missing virtqueue detach on error can lead to memory leak [rhel-8]) + +* Thu Apr 21 2022 Jon Maloy - 6.2.0-11 +- kvm-hw-intc-arm_gicv3-Check-for-MEMTX_OK-instead-of-MEMT.patch [bz#1999236] +- kvm-softmmu-physmem-Simplify-flatview_write-and-address_.patch [bz#1999236] +- kvm-softmmu-physmem-Introduce-MemTxAttrs-memory-field-an.patch [bz#1999236] +- Resolves: bz#1999236 + (CVE-2021-3750 virt:rhel/qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue leads to use-after-free [rhel-8]) + +* Thu Apr 21 2022 Jon Maloy - 6.2.0-10 +- kvm-RHEL-disable-seqpacket-for-vhost-vsock-device-in-rhe.patch [bz#2068202] +- kvm-block-Lock-AioContext-for-drain_end-in-blockdev-reop.patch [bz#2067118] +- kvm-iotests-Test-blockdev-reopen-with-iothreads-and-thro.patch [bz#2067118] +- kvm-s390x-css-fix-PMCW-invalid-mask.patch [bz#2071070] +- kvm-Set-permission-on-installing-files.patch [bz#2072377] +- Resolves: bz#2068202 + (RHEL 9.0 guest with vsock device migration failed from RHEL 9.0 > RHEL 8.6 [rhel-8.7.0]) +- Resolves: bz#2067118 + (qemu crash after execute blockdev-reopen with iothread) +- Resolves: bz#2071070 + (s390x/css: fix PMCW invalid mask) +- Resolves: bz#2072377 + (Fix build warnings that occur when installing the keymap files) + +* Wed Apr 06 2022 Jon Maloy - 6.2.0-9 +- kvm-Revert-redhat-Add-hw_compat_4_2_extra-and-apply-to-u.patch [bz#2062613] +- kvm-Revert-redhat-Enable-FDC-device-for-upstream-machine.patch [bz#2062613] +- kvm-Revert-redhat-Expose-upstream-machines-pc-4.2-and-pc.patch [bz#2062613] +- kvm-hw-virtio-vdpa-Fix-leak-of-host-notifier-memory-regi.patch [bz#2060843] +- kvm-pci-expose-TYPE_XIO3130_DOWNSTREAM-name.patch [bz#2062610] +- kvm-acpi-pcihp-pcie-set-power-on-cap-on-parent-slot.patch [bz#2062610] +- kvm-vmxcap-Add-5-level-EPT-bit.patch [bz#2065207] +- kvm-i386-Add-Icelake-Server-v6-CPU-model-with-5-level-EP.patch [bz#2065207] +- kvm-acpi-fix-QEMU-crash-when-started-with-SLIC-table.patch [bz#2062611] +- kvm-tests-acpi-whitelist-expected-blobs-before-changing-.patch [bz#2062611] +- kvm-tests-acpi-add-SLIC-table-test.patch [bz#2062611] +- kvm-tests-acpi-SLIC-update-expected-blobs.patch [bz#2062611] +- kvm-tests-acpi-manually-pad-OEM_ID-OEM_TABLE_ID-for-test.patch [bz#2062611] +- kvm-tests-acpi-whitelist-nvdimm-s-SSDT-and-FACP.slic-exp.patch [bz#2062611] +- kvm-acpi-fix-OEM-ID-OEM-Table-ID-padding.patch [bz#2062611] +- kvm-tests-acpi-update-expected-blobs.patch [bz#2062611] +- kvm-tests-acpi-test-short-OEM_ID-OEM_TABLE_ID-values-in-.patch [bz#2062611] +- kvm-rhel-workaround-for-lack-of-binary-patches-in-SRPM.patch [bz#2062611] +- Resolves: bz#2062613 + (Revert IBM-specific Ubuntu-compatibility machine type for 8.6-AV GA [rhel-8.7.0]) +- Resolves: bz#2060843 + ([virtual network][vDPA] qemu crash after hot unplug vdpa device [rhel-8.7.0]) +- Resolves: bz#2062610 + (Do operation to disk will hang in the guest of target host after hotplugging and migrating [rhel-8.7.0]) +- Resolves: bz#2065207 + (Win11 (q35+edk2) guest broke after install wsl2 through 'wsl --install -d Ubuntu-20.04' [rhel-8.7.0]) +- Resolves: bz#2062611 + (Guest can not start with SLIC acpi table [rhel-8.7.0]) + +* Tue Feb 22 2022 Jon Maloy - 6.2.0-8 +- kvm-block-nbd-Delete-reconnect-delay-timer-when-done.patch [bz#2035185] +- kvm-block-nbd-Assert-there-are-no-timers-when-closed.patch [bz#2035185] +- kvm-iotests.py-Add-QemuStorageDaemon-class.patch [bz#2035185] +- kvm-iotests-281-Test-lingering-timers.patch [bz#2035185] +- kvm-block-nbd-Move-s-ioc-on-AioContext-change.patch [bz#2035185] +- kvm-iotests-281-Let-NBD-connection-yield-in-iothread.patch [bz#2035185] +- Resolves: bz#2035185 + (Qemu core dump when start guest with nbd node or do block jobs to nbd node) + +* Tue Feb 15 2022 Jon Maloy - 6.2.0-7 +- kvm-numa-Enable-numa-for-SGX-EPC-sections.patch [bz#1518984] +- kvm-numa-Support-SGX-numa-in-the-monitor-and-Libvirt-int.patch [bz#1518984] +- kvm-doc-Add-the-SGX-numa-description.patch [bz#1518984] +- kvm-Enable-SGX-RH-Only.patch [bz#1518984] +- kvm-qapi-Cleanup-SGX-related-comments-and-restore-sectio.patch [bz#1518984] +- kvm-block-io-Update-BSC-only-if-want_zero-is-true.patch [bz#2041480] +- kvm-iotests-block-status-cache-New-test.patch [bz#2041480] +- Resolves: bz#1518984 + ([Intel 8.6 Feat] qemu-kvm: SGX 1.5 (SGX1 + Flexible Launch Control) support) +- Resolves: bz#2041480 + ([incremental_backup] Inconsistent block status reply in qemu-nbd) + +* Tue Feb 08 2022 Jon Maloy - 6.2.0-6 +- kvm-virtiofsd-Drop-membership-of-all-supplementary-group.patch [bz#2046198] +- kvm-softmmu-fix-device-deletion-events-with-device-JSON-.patch [bz#2033279] +- kvm-block-backend-prevent-dangling-BDS-pointers-across-a.patch [bz#2021778 bz#2036178] +- kvm-iotests-stream-error-on-reset-New-test.patch [bz#2021778 bz#2036178] +- kvm-block-rbd-fix-handling-of-holes-in-.bdrv_co_block_st.patch [bz#2037135] +- kvm-block-rbd-workaround-for-ceph-issue-53784.patch [bz#2037135] +- Resolves: bz#2046198 + (CVE-2022-0358 virt:av/qemu-kvm: QEMU: virtiofsd: potential privilege escalation via CVE-2018-13405 [rhel-8.6]) +- Resolves: bz#2033279 + ([wrb][qemu-kvm 6.2] The hot-unplugged device can not be hot-plugged back) +- Resolves: bz#2021778 + (Qemu core dump when do full backup during system reset) +- Resolves: bz#2036178 + (Qemu core dumped when do block-stream to a snapshot node on non-enough space storage) +- Resolves: bz#2037135 + (Booting from Local Snapshot Core Dumped Whose Backing File Is Based on RBD) + +* Tue Jan 25 2022 Jon Maloy - 6.2.0-5 +- kvm-acpi-validate-hotplug-selector-on-access.patch [bz#2036580] +- kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch [bz#2031035] +- Resolves: bz#2036580 + (CVE-2021-4158 virt:rhel/qemu-kvm: QEMU: NULL pointer dereference in pci_write() in hw/acpi/pcihp.c [rhel-8]) +- Resolves: bz#2031035 + (Add rhel-8.6.0 machine types for RHEL 8.6 [x86]) + +* Mon Jan 17 2022 Jon Maloy - 6.2.0-4 +- kvm-hw-arm-virt-Register-iommu-as-a-class-property.patch [bz#2031039] +- kvm-hw-arm-virt-Register-its-as-a-class-property.patch [bz#2031039] +- kvm-hw-arm-virt-Rename-default_bus_bypass_iommu.patch [bz#2031039] +- kvm-hw-arm-virt-Add-8.6-machine-type.patch [bz#2031039] +- kvm-hw-arm-virt-Check-no_tcg_its-and-minor-style-changes.patch [bz#2031039] +- kvm-rhel-machine-types-x86-set-prefer_sockets.patch [bz#2029582] +- Resolves: bz#2031039 + (Add rhel-8.6.0 machine types for RHEL 8.6 [aarch64]) +- Resolves: bz#2029582 + ([8.6] machine types: 6.2: Fix prefer_sockets) + +* Mon Jan 03 2022 Jon Maloy - 6.2.0-2 +- kvm-redhat-Add-rhel8.6.0-machine-type-for-s390x.patch [bz#2005325] +- kvm-redhat-Define-pseries-rhel8.6.0-machine-type.patch [bz#2031041] +- Resolves: bz#2005325 + (Fix CPU Model for new IBM Z Hardware - qemu part) +- Resolves: bz#2031041 + (Add rhel-8.6.0 machine types for RHEL 8.6 [ppc64le]) + +* Thu Dec 16 2021 Jon Maloy - 6.2.0-1.el8 +- Rebase to qemu-kvm 6.2.0 +- Resolves bz#2027716 + +* Mon Nov 22 2021 Jon Maloy - 6.1.0-5 +- kvm-e1000-fix-tx-re-entrancy-problem.patch [bz#1930092] +- kvm-hw-scsi-scsi-disk-MODE_PAGE_ALLS-not-allowed-in-MODE.patch [bz#2020720] +- Resolves: bz#1930092 + (CVE-2021-20257 virt:rhel/qemu-kvm: QEMU: net: e1000: infinite loop while processing transmit descriptors [rhel-8.5.0]) +- Resolves: bz#2020720 + (CVE-2021-3930 virt:rhel/qemu-kvm: QEMU: off-by-one error in mode_sense_page() in hw/scsi/scsi-disk.c [rhel-8]) + +* Thu Oct 21 2021 Jon Maloy - 6.1.0-4 +- kvm-spec-Remove-qemu-kiwi-build.patch [bz#2002694] +- kvm-hw-arm-virt-Add-hw_compat_rhel_8_5-to-8.5-machine-ty.patch [bz#1998947] +- Resolves: bz#2002694 + (remove qemu-kiwi rpm from qemu-kvm sources in rhel-8.6) +- Resolves: bz#1998947 + (Add machine type compatibility update for 6.1 rebase [aarch64]) + +* Tue Oct 12 2021 Jon Maloy - 6.1.0-3 +- kvm-virtio-net-fix-use-after-unmap-free-for-sg.patch [bz#1999221] +- Resolves: bz#1999221 + (CVE-2021-3748 virt:rhel/qemu-kvm: QEMU: virtio-net: heap use-after-free in virtio_net_receive_rcu [rhel-8]) + +* Fri Oct 01 2021 Jon Maloy - 6.1.0-2 +- kvm-qxl-fix-pre-save-logic.patch [bz#2002907] +- kvm-redhat-Define-hw_compat_rhel_8_5.patch [bz#1998949] +- kvm-redhat-Update-pseries-rhel8.5.0.patch [bz#1998949] +- kvm-redhat-Add-s390x-machine-type-compatibility-update-f.patch [bz#1998950] +- Resolves: bz#2002907 + (Unexpectedly failed when managedsave the guest which has qxl video device) +- Resolves: bz#1998949 + (Add machine type compatibility update for 6.1 rebase [ppc64le]) +- Resolves: bz#1998950 + (Add machine type compatibility update for 6.1 rebase [s390x]) + +* Wed Aug 25 2021 Danilo Cesar Lemes de Paula - 6.0.0-29.el8 +- kvm-file-posix-Cap-max_iov-at-IOV_MAX.patch [bz#1994494] +- kvm-migration-Move-yank-outside-qemu_start_incoming_migr.patch [bz#1974366] +- Resolves: bz#1994494 + (VM remains in paused state when trying to write on a resized disk resides on iscsi) +- Resolves: bz#1974366 + (Fail to set migrate incoming for 2nd time after the first time failed) + +* Wed Aug 18 2021 Danilo Cesar Lemes de Paula - 6.0.0-28.el8 +- kvm-iotests-Improve-and-rename-test-291-to-qemu-img-bitm.patch [bz#1946084] +- kvm-qemu-img-Fail-fast-on-convert-bitmaps-with-inconsist.patch [bz#1946084] +- kvm-qemu-img-Add-skip-broken-bitmaps-for-convert-bitmaps.patch [bz#1946084] +- kvm-audio-Never-send-migration-section.patch [bz#1991671] +- Resolves: bz#1946084 + (qemu-img convert --bitmaps fail if a bitmap is inconsistent) +- Resolves: bz#1991671 + (vmstate differs between -audiodev and QEMU_AUDIO_DRV when no sound frontends devs present.) + +* Wed Aug 04 2021 Miroslav Rezanina - 6.0.0-27 +- kvm-migration-move-wait-unplug-loop-to-its-own-function.patch [bz#1976852] +- kvm-migration-failover-continue-to-wait-card-unplug-on-e.patch [bz#1976852] +- kvm-aarch64-Add-USB-storage-devices.patch [bz#1974579] +- Resolves: bz#1976852 + ([failover vf migration] The failover vf will be unregistered if canceling the migration whose status is "wait-unplug") +- Resolves: bz#1974579 + (It's not possible to start installation from a virtual USB device on aarch64) + +* Thu Jul 29 2021 Miroslav Rezanina - 6.0.0-26 +- kvm-acpi-pc-revert-back-to-v5.2-PCI-slot-enumeration.patch [bz#1977798] +- kvm-migration-failover-reset-partially_hotplugged.patch [bz#1787194] +- kvm-hmp-Fix-loadvm-to-resume-the-VM-on-success-instead-o.patch [bz#1959676] +- kvm-migration-Move-bitmap_mutex-out-of-migration_bitmap_.patch [bz#1959729] +- kvm-i386-cpu-Expose-AVX_VNNI-instruction-to-guest.patch [bz#1924822] +- kvm-ratelimit-protect-with-a-mutex.patch [bz#1838221] +- kvm-Update-Linux-headers-to-5.13-rc4.patch [bz#1838221] +- kvm-i386-Add-ratelimit-for-bus-locks-acquired-in-guest.patch [bz#1838221] +- kvm-iothread-generalize-iothread_set_param-iothread_get_.patch [bz#1930286] +- kvm-iothread-add-aio-max-batch-parameter.patch [bz#1930286] +- kvm-linux-aio-limit-the-batch-size-using-aio-max-batch-p.patch [bz#1930286] +- kvm-block-nvme-Fix-VFIO_MAP_DMA-failed-No-space-left-on-.patch [bz#1848881] +- Resolves: bz#1977798 + (RHEL8.5 guest network interface name changed after upgrade to qemu-6.0) +- Resolves: bz#1787194 + (After canceling the migration of a vm with VF which enables failover, using "migrate -d tcp:invalid uri" to re-migrating the vm will cause the VF in vm to be hot-unplug.) +- Resolves: bz#1959676 + (guest status is paused after loadvm on rhel8.5.0) +- Resolves: bz#1959729 + (SAP/3TB VM migration slowness [idle db]) +- Resolves: bz#1924822 + ([Intel 8.5 FEAT] qemu-kvm AVX2 VNNI - Fast Train) +- Resolves: bz#1838221 + ([Intel 8.5 FEAT] qemu-kvm Bus Lock VM Exit - Fast Train) +- Resolves: bz#1930286 + (randread and randrw regression with virtio-blk multi-queue) +- Resolves: bz#1848881 + (nvme:// block driver can exhaust IOMMU DMAs, hanging the VM, possible data loss) + +* Tue Jul 20 2021 Danilo Cesar Lemes de Paula - 6.0.0-25.el8 +- kvm-s390x-cpumodel-add-3931-and-3932.patch [bz#1976171] +- kvm-file-posix-fix-max_iov-for-dev-sg-devices.patch [bz#1943653] +- kvm-scsi-generic-pass-max_segments-via-max_iov-field-in-.patch [bz#1943653] +- kvm-osdep-provide-ROUND_DOWN-macro.patch [bz#1943653] +- kvm-block-backend-align-max_transfer-to-request-alignmen.patch [bz#1943653] +- kvm-block-add-max_hw_transfer-to-BlockLimits.patch [bz#1943653] +- kvm-file-posix-try-BLKSECTGET-on-block-devices-too-do-no.patch [bz#1943653] +- Resolves: bz#1976171 + ([IBM 8.5 FEAT] CPU Model for new IBM Z Hardware - qemu part) +- Resolves: bz#1943653 + (RHV VM pauses due to 'qemu-kvm' getting EINVAL on i/o to a direct lun with scsi passthrough enabled) + +* Fri Jul 16 2021 Danilo Cesar Lemes de Paula - 6.0.0-24.el8 +- kvm-s390x-css-Introduce-an-ESW-struct.patch [bz#1968326] +- kvm-s390x-css-Split-out-the-IRB-sense-data.patch [bz#1968326] +- kvm-s390x-css-Refactor-IRB-construction.patch [bz#1968326] +- kvm-s390x-css-Add-passthrough-IRB.patch [bz#1968326] +- kvm-vhost-user-blk-Fail-gracefully-on-too-large-queue-si.patch [bz#1935014 bz#1935019 bz#1935020 bz#1935031] +- kvm-vhost-user-blk-Make-sure-to-set-Error-on-realize-fai.patch [bz#1935014 bz#1935019 bz#1935020 bz#1935031] +- kvm-vhost-user-blk-Don-t-reconnect-during-initialisation.patch [bz#1935014 bz#1935019 bz#1935020 bz#1935031] +- kvm-vhost-user-blk-Improve-error-reporting-in-realize.patch [bz#1935014 bz#1935019 bz#1935020 bz#1935031] +- kvm-vhost-user-blk-Get-more-feature-flags-from-vhost-dev.patch [bz#1935014 bz#1935019 bz#1935020 bz#1935031] +- kvm-virtio-Fail-if-iommu_platform-is-requested-but-unsup.patch [bz#1935014 bz#1935019 bz#1935020 bz#1935031] +- kvm-vhost-user-blk-Check-that-num-queues-is-supported-by.patch [bz#1935014 bz#1935019 bz#1935020 bz#1935031] +- kvm-vhost-user-Fix-backends-without-multiqueue-support.patch [bz#1935014 bz#1935019 bz#1935020 bz#1935031] +- Resolves: bz#1968326 + ([vfio_ccw] I/O error when checking format - dasdfmt requires --force in quick mode when passed through) +- Resolves: bz#1935014 + (qemu crash when attach vhost-user-blk-pci with option queue-size=4096) +- Resolves: bz#1935019 + (qemu guest failed boot when attach vhost-user-blk-pci with option iommu_platform=on) +- Resolves: bz#1935020 + (qemu guest failed boot when attach vhost-user-blk-pci with option packed=on) +- Resolves: bz#1935031 + (qemu guest failed boot when attach vhost-user-blk-pci with unmatched num-queues with qsd) + +* Thu Jul 08 2021 Danilo Cesar Lemes de Paula - 6.0.0-23.el8 +- kvm-Add-mtod_check.patch [bz#1970823 bz#1970842 bz#1970850 bz#1970858] +- kvm-bootp-limit-vendor-specific-area-to-input-packet-mem.patch [bz#1970823 bz#1970842 bz#1970850 bz#1970858] +- kvm-bootp-check-bootp_input-buffer-size.patch [bz#1970823] +- kvm-upd6-check-udp6_input-buffer-size.patch [bz#1970842] +- kvm-tftp-check-tftp_input-buffer-size.patch [bz#1970850] +- kvm-tftp-introduce-a-header-structure.patch [bz#1970823 bz#1970842 bz#1970850 bz#1970858] +- kvm-udp-check-upd_input-buffer-size.patch [bz#1970858] +- kvm-Fix-DHCP-broken-in-libslirp-v4.6.0.patch [bz#1970823 bz#1970842 bz#1970850 bz#1970858] +- kvm-redhat-use-the-standard-vhost-user-JSON-path.patch [bz#1804196] +- Resolves: bz#1970823 + (CVE-2021-3592 virt:av/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (bootp) [rhel-av-8]) +- Resolves: bz#1970842 + (CVE-2021-3593 virt:av/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (udp6) [rhel-av-8]) +- Resolves: bz#1970850 + (CVE-2021-3595 virt:av/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (tftp) [rhel-av-8]) +- Resolves: bz#1970858 + (CVE-2021-3594 virt:av/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (udp) [rhel-av-8]) +- Resolves: bz#1804196 + (inconsistent paths for interop json files) + +* Fri Jul 02 2021 Danilo Cesar Lemes de Paula - 6.0.0-22.el8 +- kvm-redhat-Expose-upstream-machines-pc-4.2-and-pc-2.11.patch [bz#1897923] +- kvm-redhat-Enable-FDC-device-for-upstream-machines-too.patch [bz#1897923] +- kvm-redhat-Add-hw_compat_4_2_extra-and-apply-to-upstream.patch [bz#1897923] +- kvm-ppc-pef.c-initialize-cgs-ready-in-kvmppc_svm_init.patch [bz#1789757] +- kvm-virtio-gpu-handle-partial-maps-properly.patch [bz#1932279] +- kvm-redhat-Fix-unversioned-Obsoletes-warning.patch [bz#1950405 bz#1967330] +- kvm-redhat-Move-qemu-kvm-docs-dependency-to-qemu-kvm.patch [bz#1950405 bz#1967330] +- kvm-redhat-introducting-qemu-kvm-hw-usbredir.patch [bz#1950405 bz#1967330] +- kvm-spapr-Fix-EEH-capability-issue-on-KVM-guest-for-PCI-.patch [bz#1976015] +- Resolves: bz#1897923 + (support Live Migration from Ubuntu 18.04 i440fx to RHEL) +- Resolves: bz#1789757 + ([IBM 8.5 FEAT] Add machine option to enable secure VM support) +- Resolves: bz#1932279 + ([aarch64] qemu core dumped when using smmuv3 and iommu_platform enabling at virtio-gpu-pci) +- Resolves: bz#1950405 + (review qemu-kvm-core dependencies) +- Resolves: bz#1967330 + (Make qemu-kvm use versioned obsoletes for qemu-kvm-ma and qemu-kvm-rhev) +- Resolves: bz#1976015 + (spapr: Fix EEH capability issue on KVM guest for PCI passthru) + +* Wed Jun 23 2021 Danilo Cesar Lemes de Paula - 6.0.0-21.el8 +- kvm-block-backend-add-drained_poll.patch [bz#1960137] +- kvm-nbd-server-Use-drained-block-ops-to-quiesce-the-serv.patch [bz#1960137] +- kvm-disable-CONFIG_USB_STORAGE_BOT.patch [bz#1866133] +- kvm-doc-Fix-some-mistakes-in-the-SEV-documentation.patch [bz#1954750] +- kvm-docs-Add-SEV-ES-documentation-to-amd-memory-encrypti.patch [bz#1954750] +- kvm-docs-interop-firmware.json-Add-SEV-ES-support.patch [bz#1954750] +- Resolves: bz#1960137 + ([incremental backup] qemu-kvm hangs when Rebooting the VM during full backup) +- Resolves: bz#1866133 + (Disable usb-bot device in QEMU (unsupported)) +- Resolves: bz#1954750 + (firmware scheme for sev-es) + +* Mon Jun 21 2021 Danilo Cesar Lemes de Paula - 6.0.0-20.el8 +- kvm-x86-Add-x86-rhel8.5-machine-types.patch [bz#1957838] +- kvm-redhat-x86-Enable-kvm-asyncpf-int-by-default.patch [bz#1967603] +- kvm-yank-Unregister-function-when-using-TLS-migration.patch [bz#1964326] +- Resolves: bz#1957838 + (8.5 machine types for x86) +- Resolves: bz#1967603 + (Enable interrupt based asynchronous page fault mechanism by default) +- Resolves: bz#1964326 + (Qemu core dump when do tls migration via tcp protocol) + +* Fri Jun 11 2021 Danilo Cesar Lemes de Paula - 6.0.0-19.el8 +- kvm-pc-bios-s390-ccw-don-t-try-to-read-the-next-block-if.patch [bz#1965626] +- kvm-redhat-Install-the-s390-netboot.img-that-we-ve-built.patch [bz#1966463] +- kvm-sockets-update-SOCKET_ADDRESS_TYPE_FD-listen-2-backl.patch [bz#1967177] +- kvm-target-i386-sev-add-support-to-query-the-attestation.patch [bz#1957022] +- kvm-spapr-Don-t-hijack-current_machine-boot_order.patch [bz#1960119] +- kvm-target-i386-Add-CPU-model-versions-supporting-xsaves.patch [bz#1942914] +- kvm-spapr-Remove-stale-comment-about-power-saving-LPCR-b.patch [bz#1940731] +- kvm-spapr-Set-LPCR-to-current-AIL-mode-when-starting-a-n.patch [bz#1940731] +- Resolves: bz#1965626 + (RHEL8.2 - QEMU BIOS fails to read stage2 loader (kvm)) +- Resolves: bz#1966463 + (Rebuild the s390-netboot.img for downstream instead of shipping the upstream image) +- Resolves: bz#1967177 + (QEMU 6.0.0 socket_get_fd() fails with the error "socket_get_fd: too many connections") +- Resolves: bz#1957022 + (SEV: Add support to query the attestation report) +- Resolves: bz#1960119 + ([regression]Failed to reset guest) +- Resolves: bz#1942914 + ([Hyper-V][RHEL8.4]Nested Hyper-V on KVM: On Intel CPU L1 2016 can not start with cpu model Skylake-Server-noTSX-IBRS or Skylake-Client-noTSX-IBRS) +- Resolves: bz#1940731 + ([ppc64le] Hotplug vcpu device hit call trace:[qemu output] KVM: unknown exit, hardware reason 7fff9ce87ed8) + +* Tue Jun 01 2021 Danilo Cesar Lemes de Paula - 6.0.0-18.el8 +- kvm-virtio-net-failover-add-missing-remove_migration_sta.patch [bz#1953045] +- kvm-hw-arm-virt-Add-8.5-machine-type.patch [bz#1957667] +- kvm-hw-arm-virt-Disable-PL011-clock-migration-through-hw.patch [bz#1957667] +- kvm-arm-virt-Register-highmem-and-gic-version-as-class-p.patch [bz#1957667] +- kvm-virtio-blk-Fix-rollback-path-in-virtio_blk_data_plan.patch [bz#1927108] +- kvm-virtio-blk-Configure-all-host-notifiers-in-a-single-.patch [bz#1927108] +- kvm-virtio-scsi-Set-host-notifiers-and-callbacks-separat.patch [bz#1927108] +- kvm-virtio-scsi-Configure-all-host-notifiers-in-a-single.patch [bz#1927108] +- kvm-hw-arm-smmuv3-Another-range-invalidation-fix.patch [bz#1929720] +- Resolves: bz#1953045 + (qemu-kvm NULL pointer de-reference during migration at migrate_fd_connect ->...-> notifier_list_notify) +- Resolves: bz#1957667 + ([aarch64] Add 8.5 machine type) +- Resolves: bz#1927108 + (It's too slow to load scsi disk when use 384 vcpus) +- Resolves: bz#1929720 + ([aarch64] Handle vsmmuv3 IOTLB invalidation with non power of 2 size) + +* Tue May 25 2021 Danilo Cesar Lemes de Paula - 6.0.0-17.el8 +- kvm-redhat-s390x-add-rhel-8.5.0-compat-machine.patch [bz#1951476] +- kvm-redhat-add-missing-entries-in-hw_compat_rhel_8_4.patch [bz#1957834] +- kvm-redhat-Define-pseries-rhel8.5.0-machine-type.patch [bz#1957834] +- Resolves: bz#1951476 + ([s390x] RHEL AV 8.5 new machine type for s390x) +- Resolves: bz#1957834 + ([ppc64le] RHEL AV 8.5 new machine type for ppc64le) + +* Mon May 03 2021 Danilo Cesar Lemes de Paula - 6.0.0-16.el8 +- Rebase to qemu-kvm 6.0.0 + +* Wed Apr 28 2021 Danilo Cesar Lemes de Paula - 5.2.0-16.el8 +- kvm-virtio-pci-compat-page-aligned-ATS.patch [bz#1942362] +- Resolves: bz#1942362 + (Live migration with iommu from rhel8.3.1 to rhel8.4 fails: qemu-kvm: get_pci_config_device: Bad config data) + +* Mon Apr 12 2021 Danilo Cesar Lemes de Paula - 5.2.0-15.el8_4 +- kvm-block-Simplify-qmp_block_resize-error-paths.patch [bz#1903511] +- kvm-block-Fix-locking-in-qmp_block_resize.patch [bz#1903511] +- kvm-block-Fix-deadlock-in-bdrv_co_yield_to_drain.patch [bz#1903511] +- Resolves: bz#1903511 + (no response on QMP command 'block_resize') + +* Sat Mar 20 2021 Danilo Cesar Lemes de Paula - 5.2.0-14.el8 +- kvm-vhost-user-blk-fix-blkcfg-num_queues-endianness.patch [bz#1937004] +- kvm-block-export-fix-blk_size-double-byteswap.patch [bz#1937004] +- kvm-block-export-use-VIRTIO_BLK_SECTOR_BITS.patch [bz#1937004] +- kvm-block-export-fix-vhost-user-blk-export-sector-number.patch [bz#1937004] +- kvm-block-export-port-virtio-blk-discard-write-zeroes-in.patch [bz#1937004] +- kvm-block-export-port-virtio-blk-read-write-range-check.patch [bz#1937004] +- kvm-spec-ui-spice-sub-package.patch [bz#1936373] +- kvm-spec-ui-opengl-sub-package.patch [bz#1936373] +- Resolves: bz#1937004 + (vhost-user-blk server endianness and input validation fixes) +- Resolves: bz#1936373 + (move spice & opengl modules to rpm subpackages) + +* Tue Mar 16 2021 Danilo Cesar Lemes de Paula - 5.2.0-13.el8 +- kvm-i386-acpi-restore-device-paths-for-pre-5.1-vms.patch [bz#1934158] +- Resolves: bz#1934158 + (Windows guest looses network connectivity when NIC was configured with static IP) + +* Mon Mar 15 2021 Danilo Cesar Lemes de Paula - 5.2.0-12.el8 +- kvm-scsi-disk-move-scsi_handle_rw_error-earlier.patch [bz#1927530] +- kvm-scsi-disk-do-not-complete-requests-early-for-rerror-.patch [bz#1927530] +- kvm-scsi-introduce-scsi_sense_from_errno.patch [bz#1927530] +- kvm-scsi-disk-pass-SCSI-status-to-scsi_handle_rw_error.patch [bz#1927530] +- kvm-scsi-disk-pass-guest-recoverable-errors-through-even.patch [bz#1927530] +- kvm-hw-intc-arm_gic-Fix-interrupt-ID-in-GICD_SGIR-regist.patch [bz#1936948] +- Resolves: bz#1927530 + (RHEL8 Hypervisor - OVIRT - Issues seen on a virtualization guest with direct passthrough LUNS pausing when a host gets a Thin threshold warning) +- Resolves: bz#1936948 + (CVE-2021-20221 virt:av/qemu-kvm: qemu: out-of-bound heap buffer access via an interrupt ID field [rhel-av-8.4.0]) + +* Mon Mar 08 2021 Danilo Cesar Lemes de Paula - 5.2.0-11.el8 +- kvm-qxl-set-qxl.ssd.dcl.con-on-secondary-devices.patch [bz#1932190] +- kvm-qxl-also-notify-the-rendering-is-done-when-skipping-.patch [bz#1932190] +- kvm-virtiofsd-Save-error-code-early-at-the-failure-calls.patch [bz#1935071] +- kvm-virtiofs-drop-remapped-security.capability-xattr-as-.patch [bz#1935071] +- Resolves: bz#1932190 + (Timeout when dump the screen from 2nd VGA) +- Resolves: bz#1935071 + (CVE-2021-20263 virt:8.4/qemu-kvm: QEMU: virtiofsd: 'security.capabilities' is not dropped with xattrmap option [rhel-av-8]) + +* Wed Mar 03 2021 Danilo Cesar Lemes de Paula - 5.2.0-10.el8 +- kvm-migration-dirty-bitmap-Use-struct-for-alias-map-inne.patch [bz#1930757] +- kvm-migration-dirty-bitmap-Allow-control-of-bitmap-persi.patch [bz#1930757] +- kvm-qemu-iotests-300-Add-test-case-for-modifying-persist.patch [bz#1930757] +- kvm-failover-fix-indentantion.patch [bz#1819991] +- kvm-failover-Use-always-atomics-for-primary_should_be_hi.patch [bz#1819991] +- kvm-failover-primary-bus-is-only-used-once-and-where-it-.patch [bz#1819991] +- kvm-failover-Remove-unused-parameter.patch [bz#1819991] +- kvm-failover-Remove-external-partially_hotplugged-proper.patch [bz#1819991] +- kvm-failover-qdev_device_add-returns-err-or-dev-set.patch [bz#1819991] +- kvm-failover-Rename-bool-to-failover_primary_hidden.patch [bz#1819991] +- kvm-failover-g_strcmp0-knows-how-to-handle-NULL.patch [bz#1819991] +- kvm-failover-Remove-primary_device_opts.patch [bz#1819991] +- kvm-failover-remove-standby_id-variable.patch [bz#1819991] +- kvm-failover-Remove-primary_device_dict.patch [bz#1819991] +- kvm-failover-Remove-memory-leak.patch [bz#1819991] +- kvm-failover-simplify-virtio_net_find_primary.patch [bz#1819991] +- kvm-failover-should_be_hidden-should-take-a-bool.patch [bz#1819991] +- kvm-failover-Rename-function-to-hide_device.patch [bz#1819991] +- kvm-failover-virtio_net_connect_failover_devices-does-no.patch [bz#1819991] +- kvm-failover-Rename-to-failover_find_primary_device.patch [bz#1819991] +- kvm-failover-simplify-qdev_device_add-failover-case.patch [bz#1819991] +- kvm-failover-simplify-qdev_device_add.patch [bz#1819991] +- kvm-failover-make-sure-that-id-always-exist.patch [bz#1819991] +- kvm-failover-remove-failover_find_primary_device-error-p.patch [bz#1819991] +- kvm-failover-split-failover_find_primary_device_id.patch [bz#1819991] +- kvm-failover-We-don-t-need-to-cache-primary_device_id-an.patch [bz#1819991] +- kvm-failover-Caller-of-this-two-functions-already-have-p.patch [bz#1819991] +- kvm-failover-simplify-failover_unplug_primary.patch [bz#1819991] +- kvm-failover-Remove-primary_dev-member.patch [bz#1819991] +- kvm-virtio-net-add-missing-object_unref.patch [bz#1819991] +- kvm-x86-cpu-Populate-SVM-CPUID-feature-bits.patch [bz#1926785] +- kvm-i386-Add-the-support-for-AMD-EPYC-3rd-generation-pro.patch [bz#1926785] +- Resolves: bz#1930757 + (Allow control of block-dirty-bitmap persistence via 'block-bitmap-mapping') +- Resolves: bz#1819991 + (Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug) +- Resolves: bz#1926785 + ([RFE] AMD Milan - Add KVM/support for EPYC-Milan CPU Model - Fast Train) + +* Mon Mar 01 2021 Danilo Cesar Lemes de Paula - 5.2.0-9.el8 +- kvm-docs-generate-qemu-storage-daemon-qmp-ref-7-man-page.patch [bz#1901323] +- kvm-docs-add-qemu-storage-daemon-1-man-page.patch [bz#1901323] +- kvm-docs-Add-qemu-storage-daemon-1-manpage-to-meson.buil.patch [bz#1901323] +- kvm-qemu-storage-daemon-Enable-object-add.patch [bz#1901323] +- kvm-spec-Package-qemu-storage-daemon.patch [bz#1901323] +- kvm-default-configs-Enable-vhost-user-blk.patch [bz#1930033] +- kvm-qemu-nbd-Use-SOMAXCONN-for-socket-listen-backlog.patch [bz#1925345] +- kvm-pcie-don-t-set-link-state-active-if-the-slot-is-empt.patch [bz#1917654] +- Resolves: bz#1901323 + (QSD (QEMU Storage Daemon): basic support - TechPreview) +- Resolves: bz#1930033 + (enable vhost-user-blk device) +- Resolves: bz#1925345 + (qemu-nbd needs larger backlog for Unix socket listen()) +- Resolves: bz#1917654 + ([failover vf migration][RHEL84 vm] After start a vm with a failover vf + a failover virtio net device, the failvoer vf do not exist in the vm) + +* Fri Feb 19 2021 Eduardo Lima (Etrunko) - 5.2.0-8.el8 +- kvm-block-nbd-only-detach-existing-iochannel-from-aio_co.patch [bz#1887883] +- kvm-block-nbd-only-enter-connection-coroutine-if-it-s-pr.patch [bz#1887883] +- kvm-nbd-make-nbd_read-return-EIO-on-error.patch [bz#1887883] +- kvm-virtio-move-use-disabled-flag-property-to-hw_compat_.patch [bz#1907255] +- kvm-virtiofsd-extract-lo_do_open-from-lo_open.patch [bz#1920740] +- kvm-virtiofsd-optionally-return-inode-pointer-from-lo_do.patch [bz#1920740] +- kvm-virtiofsd-prevent-opening-of-special-files-CVE-2020-.patch [bz#1920740] +- kvm-spapr-Adjust-firmware-path-of-PCI-devices.patch [bz#1920941] +- kvm-pci-reject-too-large-ROMs.patch [bz#1917830] +- kvm-pci-add-romsize-property.patch [bz#1917830] +- kvm-redhat-Add-some-devices-for-exporting-upstream-machi.patch [bz#1917826] +- kvm-vhost-Check-for-valid-vdev-in-vhost_backend_handle_i.patch [bz#1880299] +- Resolves: bz#1887883 + (qemu blocks client progress with various NBD actions) +- Resolves: bz#1907255 + (Migrate failed with vhost-vsock-pci from RHEL-AV 8.3.1 to RHEL-AV 8.2.1) +- Resolves: bz#1920740 + (CVE-2020-35517 virt:8.4/qemu-kvm: QEMU: virtiofsd: potential privileged host device access from guest [rhel-av-8.4.0]) +- Resolves: bz#1920941 + ([ppc64le] [AV]--disk cdimage.iso,bus=usb fails to boot) +- Resolves: bz#1917830 + (Add romsize property to qemu-kvm) +- Resolves: bz#1917826 + (Add extra device support to qemu-kvm, but not to rhel machine types) +- Resolves: bz#1880299 + (vhost-user mq connection fails to restart after kill host testpmd which acts as vhost-user client) + +* Fri Feb 12 2021 Eduardo Lima (Etrunko) - 5.2.0-7.el8 +- kvm-virtio-Add-corresponding-memory_listener_unregister-.patch [bz#1903521] +- kvm-block-Honor-blk_set_aio_context-context-requirements.patch [bz#1918966 bz#1918968] +- kvm-nbd-server-Quiesce-coroutines-on-context-switch.patch [bz#1918966 bz#1918968] +- kvm-block-Avoid-processing-BDS-twice-in-bdrv_set_aio_con.patch [bz#1918966 bz#1918968] +- kvm-storage-daemon-Call-bdrv_close_all-on-exit.patch [bz#1918966 bz#1918968] +- kvm-block-move-blk_exp_close_all-to-qemu_cleanup.patch [bz#1918966 bz#1918968] +- Resolves: bz#1903521 + (hot unplug vhost-user cause qemu crash: qemu-kvm: ../softmmu/memory.c:2818: do_address_space_destroy: Assertion `QTAILQ_EMPTY(&as->listeners)' failed.) +- Resolves: bz#1918966 + ([incremental_backup] qemu aborts if guest reboot during backup when using virtio-blk: "aio_co_schedule: Co-routine was already scheduled in 'aio_co_schedule'") +- Resolves: bz#1918968 + ([incremental_backup] qemu deadlock after poweroff in guest during backup in nbd_export_close_all()) + +* Tue Feb 09 2021 Eduardo Lima (Etrunko) - 5.2.0-6.el8 +- kvm-scsi-fix-device-removal-race-vs-IO-restart-callback-.patch [bz#1854811] +- kvm-tracetool-also-strip-l-and-ll-from-systemtap-format-.patch [bz#1907264] +- kvm-redhat-moving-all-documentation-files-to-qemu-kvm-do.patch [bz#1881170 bz#1924766] +- kvm-hw-arm-smmuv3-Fix-addr_mask-for-range-based-invalida.patch [bz#1834152] +- kvm-redhat-makes-qemu-respect-system-s-crypto-profile.patch [bz#1902219] +- kvm-vhost-Unbreak-SMMU-and-virtio-iommu-on-dev-iotlb-sup.patch [bz#1925028] +- kvm-docs-set-CONFDIR-when-running-sphinx.patch [bz#1902537] +- Resolves: bz#1854811 + (scsi-bus.c: use-after-free due to race between device unplug and I/O operation causes guest crash) +- Resolves: bz#1907264 + (systemtap: invalid or missing conversion specifier at the trace event vhost_vdpa_set_log_base) +- Resolves: bz#1881170 + (split documentation from the qemu-kvm-core package to its own subpackage) +- Resolves: bz#1924766 + (split documentation from the qemu-kvm-core package to its own subpackage [av-8.4.0]) +- Resolves: bz#1834152 + ([aarch64] QEMU SMMUv3 device: Support range invalidation) +- Resolves: bz#1902219 + (QEMU doesn't honour system crypto policies) +- Resolves: bz#1925028 + (vsmmuv3/vhost and virtio-iommu/vhost regression) +- Resolves: bz#1902537 + (The default fsfreeze-hook path from man page and qemu-ga --help command are different) + +* Tue Feb 02 2021 Eduardo Lima (Etrunko) - 5.2.0-5.el8 +- kvm-spapr-Allow-memory-unplug-to-always-succeed.patch [bz#1914069] +- kvm-spapr-Improve-handling-of-memory-unplug-with-old-gue.patch [bz#1914069] +- kvm-x86-cpu-Add-AVX512_FP16-cpu-feature.patch [bz#1838738] +- kvm-q35-Increase-max_cpus-to-710-on-pc-q35-rhel8-machine.patch [bz#1904268] +- kvm-config-enable-VFIO_CCW.patch [bz#1922170] +- Resolves: bz#1914069 + ([ppc64le] have this fix for rhel8.4 av (spapr: Allow memory unplug to always succeed)) +- Resolves: bz#1838738 + ([Intel 8.4 FEAT] qemu-kvm Sapphire Rapids (SPR) New Instructions (NIs) - Fast Train) +- Resolves: bz#1904268 + ([RFE] [HPEMC] qemu-kvm: support up to 710 VCPUs) +- Resolves: bz#1922170 + (Enable vfio-ccw in AV) + +* Wed Jan 27 2021 Danilo Cesar Lemes de Paula - 5.2.0-4.el8 +- kvm-Drop-bogus-IPv6-messages.patch [bz#1918061] +- Resolves: bz#1918061 + (CVE-2020-10756 virt:rhel/qemu-kvm: QEMU: slirp: networking out-of-bounds read information disclosure vulnerability [rhel-av-8]) + +* Mon Jan 18 2021 Danilo Cesar Lemes de Paula - 5.2.0-3.el8 +- kvm-block-nvme-Implement-fake-truncate-coroutine.patch [bz#1848834] +- kvm-spec-find-system-python-via-meson.patch [bz#1899619] +- kvm-build-system-use-b_staticpic-false.patch [bz#1899619] +- kvm-spapr-Fix-buffer-overflow-in-spapr_numa_associativit.patch [bz#1908693] +- kvm-usb-hcd-xhci-pci-Fixup-capabilities-ordering-again.patch [bz#1912846] +- kvm-qga-commands-posix-Send-CCW-address-on-s390x-with-th.patch [bz#1755075] +- kvm-AArch64-machine-types-cleanup.patch [bz#1895276] +- kvm-hw-arm-virt-Add-8.4-Machine-type.patch [bz#1895276] +- kvm-udev-kvm-check-remove-the-exceeded-subscription-limi.patch [bz#1914463] +- kvm-memory-Rename-memory_region_notify_one-to-memory_reg.patch [bz#1845758] +- kvm-memory-Add-IOMMUTLBEvent.patch [bz#1845758] +- kvm-memory-Add-IOMMU_NOTIFIER_DEVIOTLB_UNMAP-IOMMUTLBNot.patch [bz#1845758] +- kvm-intel_iommu-Skip-page-walking-on-device-iotlb-invali.patch [bz#1845758] +- kvm-memory-Skip-bad-range-assertion-if-notifier-is-DEVIO.patch [bz#1845758] +- kvm-RHEL-Switch-pvpanic-test-to-q35.patch [bz#1885555] +- kvm-8.4-x86-machine-type.patch [bz#1885555] +- kvm-memory-clamp-cached-translation-in-case-it-points-to.patch [bz#1904392] +- Resolves: bz#1848834 + (Failed to create luks format image on NVMe device) +- Resolves: bz#1899619 + (QEMU 5.2 is built with PIC objects instead of PIE) +- Resolves: bz#1908693 + ([ppc64le]boot up a guest with 128 numa nodes ,qemu got coredump) +- Resolves: bz#1912846 + (qemu-kvm: Failed to load xhci:parent_obj during migration) +- Resolves: bz#1755075 + ([qemu-guest-agent] fsinfo doesn't return disk info on s390x) +- Resolves: bz#1895276 + (Machine types update for aarch64 for QEMU 5.2.0) +- Resolves: bz#1914463 + (Remove KVM guest count and limit info message) +- Resolves: bz#1845758 + (qemu core dumped: qemu-kvm: /builddir/build/BUILD/qemu-4.2.0/memory.c:1928: memory_region_notify_one: Assertion `entry->iova >= notifier->start && entry_end <= notifier->end' failed.) +- Resolves: bz#1885555 + (8.4 machine types for x86) +- Resolves: bz#1904392 + (CVE-2020-27821 virt:8.4/qemu-kvm: QEMU: heap buffer overflow in msix_table_mmio_write() in hw/pci/msix.c [rhel-av-8]) + +* Tue Dec 15 2020 Danilo Cesar Lemes de Paula - 5.2.0-2.el8 +- kvm-redhat-Define-hw_compat_8_3.patch [bz#1893935] +- kvm-redhat-Add-spapr_machine_rhel_default_class_options.patch [bz#1893935] +- kvm-redhat-Define-pseries-rhel8.4.0-machine-type.patch [bz#1893935] +- kvm-redhat-s390x-add-rhel-8.4.0-compat-machine.patch [bz#1836282] +- Resolves: bz#1836282 + (New machine type for qemu-kvm on s390x in RHEL-AV) +- Resolves: bz#1893935 + (New machine type on RHEL-AV 8.4 for ppc64le) + +* Wed Dec 09 2020 Miroslav Rezanina - 5.2.0-1.el8 +- Rebase to QEMU 5.2.0 [bz#1905933] +- Resolves: bz#1905933 + (Rebase qemu-kvm to version 5.2.0) + +* Tue Dec 01 2020 Danilo Cesar Lemes de Paula - 5.1.0-16.el8 +- kvm-redhat-introduces-disable_everything-macro-into-the-.patch [bz#1884611] +- kvm-redhat-scripts-extract_build_cmd.py-Avoid-listing-em.patch [bz#1884611] +- kvm-redhat-Removing-unecessary-configurations.patch [bz#1884611] +- kvm-redhat-Fixing-rh-local-build.patch [bz#1884611] +- kvm-redhat-allow-Makefile-rh-prep-builddep-to-fail.patch [bz#1884611] +- kvm-redhat-adding-rh-rpm-target.patch [bz#1884611] +- kvm-redhat-move-shareable-files-from-qemu-kvm-core-to-qe.patch [bz#1884611] +- kvm-redhat-Add-qemu-kiwi-subpackage.patch [bz#1884611] +- Resolves: bz#1884611 + (Build kata-specific version of qemu) + +* Mon Nov 16 2020 Danilo Cesar Lemes de Paula - 5.1.0-15.el8 +- kvm-redhat-add-un-pre-install-systemd-hooks-for-qemu-ga.patch [bz#1882719] +- kvm-rcu-Implement-drain_call_rcu.patch [bz#1812399 bz#1866707] +- kvm-libqtest-Rename-qmp_assert_error_class-to-qmp_expect.patch [bz#1812399 bz#1866707] +- kvm-qtest-rename-qtest_qmp_receive-to-qtest_qmp_receive_.patch [bz#1812399 bz#1866707] +- kvm-qtest-Reintroduce-qtest_qmp_receive-with-QMP-event-b.patch [bz#1812399 bz#1866707] +- kvm-qtest-remove-qtest_qmp_receive_success.patch [bz#1812399 bz#1866707] +- kvm-device-plug-test-use-qtest_qmp-to-send-the-device_de.patch [bz#1812399 bz#1866707] +- kvm-qtest-switch-users-back-to-qtest_qmp_receive.patch [bz#1812399 bz#1866707] +- kvm-qtest-check-that-drives-are-really-appearing-and-dis.patch [bz#1812399 bz#1866707] +- kvm-qemu-iotests-qtest-rewrite-test-067-as-a-qtest.patch [bz#1812399 bz#1866707] +- kvm-qdev-add-check-if-address-free-callback-for-buses.patch [bz#1812399 bz#1866707] +- kvm-scsi-scsi_bus-switch-search-direction-in-scsi_device.patch [bz#1812399 bz#1866707] +- kvm-device_core-use-drain_call_rcu-in-in-qmp_device_add.patch [bz#1812399 bz#1866707] +- kvm-device-core-use-RCU-for-list-of-children-of-a-bus.patch [bz#1812399 bz#1866707] +- kvm-scsi-switch-to-bus-check_address.patch [bz#1812399 bz#1866707] +- kvm-device-core-use-atomic_set-on-.realized-property.patch [bz#1812399 bz#1866707] +- kvm-scsi-scsi-bus-scsi_device_find-don-t-return-unrealiz.patch [bz#1812399] +- kvm-scsi-scsi_bus-Add-scsi_device_get.patch [bz#1812399 bz#1866707] +- kvm-virtio-scsi-use-scsi_device_get.patch [bz#1812399 bz#1866707] +- kvm-scsi-scsi_bus-fix-races-in-REPORT-LUNS.patch [bz#1812399 bz#1866707] +- kvm-tests-migration-fix-memleak-in-wait_command-wait_com.patch [bz#1812399 bz#1866707] +- kvm-libqtest-fix-the-order-of-buffered-events.patch [bz#1812399 bz#1866707] +- kvm-libqtest-fix-memory-leak-in-the-qtest_qmp_event_ref.patch [bz#1812399 bz#1866707] +- kvm-iotests-add-filter_qmp_virtio_scsi-function.patch [bz#1812399 bz#1866707] +- kvm-iotests-rewrite-iotest-240-in-python.patch [bz#1812399 bz#1866707] +- Resolves: bz#1812399 + (Qemu crash when detach disk with cache="none" discard="ignore" io="native") +- Resolves: bz#1866707 + (qemu-kvm is crashing with error "scsi_target_emulate_report_luns: Assertion `i == n + 8' failed") +- Resolves: bz#1882719 + (qemu-ga service still active and can work after qemu-guest-agent been removed) + +* Tue Oct 13 2020 Danilo Cesar Lemes de Paula - 5.1.0-14.el8_3 +- kvm-virtiofsd-avoid-proc-self-fd-tempdir.patch [bz#1884276] +- Resolves: bz#1884276 + (Pod with kata-runtime won't start, QEMU: "vhost_user_dev init failed, Operation not permitted" [mkdtemp failing in sandboxing]) + +* Thu Oct 08 2020 Danilo Cesar Lemes de Paula - 5.1.0-13.el8_3 +- kvm-x86-lpc9-let-firmware-negotiate-CPU-hotplug-with-SMI.patch [bz#1846886] +- kvm-x86-cpuhp-prevent-guest-crash-on-CPU-hotplug-when-br.patch [bz#1846886] +- kvm-x86-cpuhp-refuse-cpu-hot-unplug-request-earlier-if-n.patch [bz#1846886] +- Resolves: bz#1846886 + (Guest hit soft lockup or reboots if hotplug vcpu under ovmf) + +* Mon Oct 05 2020 Danilo Cesar Lemes de Paula - 5.1.0-12.el8_3 +- kvm-virtio-skip-legacy-support-check-on-machine-types-le.patch [bz#1868449] +- kvm-vhost-vsock-pci-force-virtio-version-1.patch [bz#1868449] +- kvm-vhost-user-vsock-pci-force-virtio-version-1.patch [bz#1868449] +- kvm-vhost-vsock-ccw-force-virtio-version-1.patch [bz#1868449] +- Resolves: bz#1868449 + (vhost_vsock error: device is modern-only, use disable-legacy=on) + +* Mon Oct 05 2020 Danilo Cesar Lemes de Paula - 5.1.0-11.el8_3 +- kvm-migration-increase-max-bandwidth-to-128-MiB-s-1-Gib-.patch [bz#1874004] +- kvm-redhat-Make-all-generated-so-files-executable-not-on.patch [bz#1876635] +- Resolves: bz#1874004 + (Live migration performance is poor during guest installation process on power host) +- Resolves: bz#1876635 + (VM fails to start with a passthrough smartcard) + +* Mon Sep 28 2020 Danilo Cesar Lemes de Paula - 5.1.0-10.el8 +- kvm-qemu-img-Support-bitmap-merge-into-backing-image.patch [bz#1877209] +- Resolves: bz#1877209 + ('qemu-img bitmaps --merge' failed when trying to merge top volume bitmap to base volume bitmap) + +* Mon Sep 21 2020 Danilo Cesar Lemes de Paula - 5.1.0-9.el8 +- kvm-hw-nvram-fw_cfg-fix-FWCfgDataGeneratorClass-get_data.patch [bz#1688978] +- Resolves: bz#1688978 + (RFE: forward host preferences for cipher suites and CA certs to guest firmware) + +* Thu Sep 17 2020 Danilo Cesar Lemes de Paula - 5.1.0-8.el8 +- kvm-redhat-link-etc-qemu-ga-fsfreeze-hook-to-etc-qemu-kv.patch [bz#1738820] +- kvm-seccomp-fix-killing-of-whole-process-instead-of-thre.patch [bz#1752376] +- kvm-Revert-Drop-bogus-IPv6-messages.patch [bz#1867075] +- kvm-block-rbd-add-namespace-to-qemu_rbd_strong_runtime_o.patch [bz#1821528] +- Resolves: bz#1738820 + ('-F' option of qemu-ga command cause the guest-fsfreeze-freeze command doesn't work) +- Resolves: bz#1752376 + (qemu use SCMP_ACT_TRAP even SCMP_ACT_KILL_PROCESS is available) +- Resolves: bz#1821528 + (missing namespace attribute when access the rbd image with namespace) +- Resolves: bz#1867075 + (CVE-2020-10756 virt:8.3/qemu-kvm: QEMU: slirp: networking out-of-bounds read information disclosure vulnerability [rhel-av-8]) + +* Tue Sep 15 2020 Danilo Cesar Lemes de Paula - 5.1.0-7.el8 +- kvm-target-ppc-Add-experimental-option-for-enabling-secu.patch [bz#1789757 bz#1870384] +- kvm-target-arm-Move-start-powered-off-property-to-generi.patch [bz#1849483] +- kvm-target-arm-Move-setting-of-CPU-halted-state-to-gener.patch [bz#1849483] +- kvm-ppc-spapr-Use-start-powered-off-CPUState-property.patch [bz#1849483] +- Resolves: bz#1789757 + ([IBM 8.4 FEAT] Add machine option to enable secure VM support) +- Resolves: bz#1849483 + (Failed to boot up guest when hotplugging vcpus on bios stage) +- Resolves: bz#1870384 + ([IBM 8.3 FEAT] Add interim/unsupported machine option to enable secure VM support for testing purposes) + +* Thu Sep 10 2020 Danilo Cesar Lemes de Paula - 5.1.0-6.el8 +- kvm-spec-Move-qemu-pr-helper-back-to-usr-bin.patch [bz#1869635] +- kvm-Bump-required-libusbx-version.patch [bz#1856591] +- Resolves: bz#1856591 + (libusbx isn't updated with qemu-kvm) +- Resolves: bz#1869635 + ('/usr/bin/qemu-pr-helper' is not a suitable pr helper: No such file or directory) + +* Tue Sep 08 2020 Danilo Cesar Lemes de Paula - 5.1.0-5.el8 +- kvm-Revert-i386-Fix-pkg_id-offset-for-EPYC-cpu-models.patch [bz#1873417] +- kvm-Revert-target-i386-Enable-new-apic-id-encoding-for-E.patch [bz#1873417] +- kvm-Revert-hw-i386-Move-arch_id-decode-inside-x86_cpus_i.patch [bz#1873417] +- kvm-Revert-i386-Introduce-use_epyc_apic_id_encoding-in-X.patch [bz#1873417] +- kvm-Revert-hw-i386-Introduce-apicid-functions-inside-X86.patch [bz#1873417] +- kvm-Revert-target-i386-Cleanup-and-use-the-EPYC-mode-top.patch [bz#1873417] +- kvm-Revert-hw-386-Add-EPYC-mode-topology-decoding-functi.patch [bz#1873417] +- kvm-nvram-Exit-QEMU-if-NVRAM-cannot-contain-all-prom-env.patch [bz#1867739] +- kvm-usb-fix-setup_len-init-CVE-2020-14364.patch [bz#1869715] +- kvm-Remove-explicit-glusterfs-api-dependency.patch [bz#1872853] +- kvm-disable-virgl.patch [bz#1831271] +- Resolves: bz#1831271 + (Drop virgil acceleration support and remove virglrenderer dependency) +- Resolves: bz#1867739 + (-prom-env does not validate input) +- Resolves: bz#1869715 + (CVE-2020-14364 qemu-kvm: QEMU: usb: out-of-bounds r/w access issue while processing usb packets [rhel-av-8.3.0]) +- Resolves: bz#1872853 + (move the glusterfs dependency out of qemu-kvm-core to the glusterfs module) +- Resolves: bz#1873417 + (AMD/NUMA topology - revert 5.1 changes) + +* Thu Aug 27 2020 Danilo Cesar Lemes de Paula - 5.1.0-4.el8 +- kvm-Drop-bogus-IPv6-messages.patch [bz#1867075] +- kvm-machine-types-numa-set-numa_mem_supported-on-old-mac.patch [bz#1849707] +- kvm-machine_types-numa-compatibility-for-auto_enable_num.patch [bz#1849707] +- kvm-migration-Add-block-bitmap-mapping-parameter.patch [bz#1790492] +- kvm-iotests.py-Let-wait_migration-return-on-failure.patch [bz#1790492] +- kvm-iotests-Test-node-bitmap-aliases-during-migration.patch [bz#1790492] +- Resolves: bz#1790492 + ('dirty-bitmaps' migration capability should allow configuring target nodenames) +- Resolves: bz#1849707 + (8.3 machine types for x86 - 5.1 update) +- Resolves: bz#1867075 + (CVE-2020-10756 virt:8.3/qemu-kvm: QEMU: slirp: networking out-of-bounds read information disclosure vulnerability [rhel-av-8]) + +* Wed Aug 19 2020 Danilo Cesar Lemes de Paula - 5.1.0-3.el8 +- kvm-redhat-Update-hw_compat_8_2.patch [bz#1843348] +- kvm-redhat-update-pseries-rhel8.2.0-machine-type.patch [bz#1843348] +- kvm-Disable-TPM-passthrough-backend-on-ARM.patch [bz#1801242] +- kvm-Require-libfdt-1.6.0.patch [bz#1867847] +- Resolves: bz#1801242 + ([aarch64] vTPM support in machvirt) +- Resolves: bz#1843348 + (8.3 machine types for POWER) +- Resolves: bz#1867847 + ([ppc] virt module 7629: /usr/libexec/qemu-kvm: undefined symbol: fdt_check_full, version LIBFDT_1.2) + +* Wed Aug 12 2020 Danilo Cesar Lemes de Paula - 5.1.0-2.el8 +- kvm-redhat-define-hw_compat_8_2.patch [bz#1853265] +- Resolves: bz#1853265 + (Forward and backward migration from rhel-av-8.3.0(qemu-kvm-5.0.0) to rhel-av-8.2.1(qemu-kvm-4.2.0) failed with "qemu-kvm: error while loading state for instance 0x0 of device 'spapr'") + +* Wed Aug 12 2020 Danilo Cesar Lemes de Paula - 5.1.0-1.el8 +- Quick changelog fix to reflect the current fixes: +- Resolve: bz#1781911 +- Resolve: bz#1841529 +- Resolve: bz#1842902 +- Resolve: bz#1818843 +- Resolve: bz#1819292 +- Resolve: bz#1801242 + +* Wed Aug 12 2020 Danilo Cesar Lemes de Paula - 5.1.0-0.el8 +- Rebase to 5.1.0 +- Resolves: bz#1809650 + +* Tue Jul 07 2020 Danilo Cesar Lemes de Paula - 4.2.0-29.el8 +- kvm-virtio-net-fix-removal-of-failover-device.patch [bz#1820120] +- Resolves: bz#1820120 + (After hotunplugging the vitrio device and netdev, hotunpluging the failover VF will cause qemu core dump) + +* Sun Jun 28 2020 Danilo Cesar Lemes de Paula - 4.2.0-28.el8 +- kvm-virtio-blk-Refactor-the-code-that-processes-queued-r.patch [bz#1812765] +- kvm-virtio-blk-On-restart-process-queued-requests-in-the.patch [bz#1812765] +- kvm-Fix-use-afte-free-in-ip_reass-CVE-2020-1983.patch [bz#1838082] +- Resolves: bz#1812765 + (qemu with iothreads enabled crashes on resume after enospc pause for disk extension) +- Resolves: bz#1838082 + (CVE-2020-1983 virt:8.2/qemu-kvm: QEMU: slirp: use-after-free in ip_reass() function in ip_input.c [rhel-av-8]) + +* Thu Jun 18 2020 Eduardo Lima (Etrunko) - 4.2.0-27.el8 +- kvm-hw-pci-pcie-Move-hot-plug-capability-check-to-pre_pl.patch [bz#1820531] +- kvm-spec-Fix-python-shenigans-for-tests.patch [bz#1845779] +- kvm-target-i386-Add-ARCH_CAPABILITIES-related-bits-into-.patch [bz#1840342] +- Resolves: bz#1820531 + (qmp command query-pci get wrong result after hotplug device under hotplug=off controller) +- Resolves: bz#1840342 + ([Intel 8.2.1 Bug] qemu-kvm Add ARCH_CAPABILITIES to Icelake-Server cpu model - Fast Train) +- Resolves: bz#1845779 + (Install 'qemu-kvm-tests' failed as nothing provides /usr/libexec/platform-python3 - virt module 6972) + +* Wed Jun 17 2020 Eduardo Lima (Etrunko) - 4.2.0-26.el8 +- kvm-nbd-server-Avoid-long-error-message-assertions-CVE-2.patch [bz#1845384] +- kvm-block-Call-attention-to-truncation-of-long-NBD-expor.patch [bz#1845384] +- Resolves: bz#1845384 + (CVE-2020-10761 virt:8.2/qemu-kvm: QEMU: nbd: reachable assertion failure in nbd_negotiate_send_rep_verr via remote client [rhel-av-8]) + +* Tue Jun 09 2020 Danilo Cesar Lemes de Paula - 4.2.0-25.el8 +- kvm-enable-ramfb.patch [bz#1841068] +- kvm-block-Add-flags-to-BlockDriver.bdrv_co_truncate.patch [bz#1780574] +- kvm-block-Add-flags-to-bdrv-_co-_truncate.patch [bz#1780574] +- kvm-block-backend-Add-flags-to-blk_truncate.patch [bz#1780574] +- kvm-qcow2-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch [bz#1780574] +- kvm-raw-format-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch [bz#1780574] +- kvm-file-posix-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch [bz#1780574] +- kvm-block-truncate-Don-t-make-backing-file-data-visible.patch [bz#1780574] +- kvm-iotests-Add-qemu_io_log.patch [bz#1780574] +- kvm-iotests-Filter-testfiles-out-in-filter_img_info.patch [bz#1780574] +- kvm-iotests-Test-committing-to-short-backing-file.patch [bz#1780574] +- kvm-qcow2-Forward-ZERO_WRITE-flag-for-full-preallocation.patch [bz#1780574] +- kvm-i386-Add-MSR-feature-bit-for-MDS-NO.patch [bz#1769912] +- kvm-i386-Add-macro-for-stibp.patch [bz#1769912] +- kvm-target-i386-Add-new-bit-definitions-of-MSR_IA32_ARCH.patch [bz#1769912] +- kvm-i386-Add-new-CPU-model-Cooperlake.patch [bz#1769912] +- kvm-target-i386-Add-missed-features-to-Cooperlake-CPU-mo.patch [bz#1769912] +- Resolves: bz#1769912 + ([Intel 8.2.1 Feature] introduce Cooper Lake cpu model - qemu-kvm Fast Train) +- Resolves: bz#1780574 + (Data corruption with resizing short overlay over longer backing files) +- Resolves: bz#1841068 + (RFE: please support the "ramfb" display device model) + +* Mon Jun 08 2020 Danilo Cesar Lemes de Paula - 4.2.0-24.el8 +- kvm-target-i386-set-the-CPUID-level-to-0x14-on-old-machi.patch [bz#1513681] +- kvm-block-curl-HTTP-header-fields-allow-whitespace-aroun.patch [bz#1841038] +- kvm-block-curl-HTTP-header-field-names-are-case-insensit.patch [bz#1841038] +- kvm-MAINTAINERS-fix-qcow2-bitmap.c-under-Dirty-Bitmaps-h.patch [bz#1779893 bz#1779904] +- kvm-iotests-Let-_make_test_img-parse-its-parameters.patch [bz#1779893 bz#1779904] +- kvm-qemu_img-add-cvtnum_full-to-print-error-reports.patch [bz#1779893 bz#1779904] +- kvm-block-Make-it-easier-to-learn-which-BDS-support-bitm.patch [bz#1779893 bz#1779904] +- kvm-blockdev-Promote-several-bitmap-functions-to-non-sta.patch [bz#1779893 bz#1779904] +- kvm-blockdev-Split-off-basic-bitmap-operations-for-qemu-.patch [bz#1779893 bz#1779904] +- kvm-qemu-img-Add-bitmap-sub-command.patch [bz#1779893 bz#1779904] +- kvm-iotests-Fix-test-178.patch [bz#1779893 bz#1779904] +- kvm-qcow2-Expose-bitmaps-size-during-measure.patch [bz#1779893 bz#1779904] +- kvm-qemu-img-Factor-out-code-for-merging-bitmaps.patch [bz#1779893 bz#1779904] +- kvm-qemu-img-Add-convert-bitmaps-option.patch [bz#1779893 bz#1779904] +- kvm-iotests-Add-test-291-to-for-qemu-img-bitmap-coverage.patch [bz#1779893 bz#1779904] +- kvm-iotests-Add-more-skip_if_unsupported-statements-to-t.patch [bz#1778593] +- kvm-iotests-don-t-use-format-for-drive_add.patch [bz#1778593] +- kvm-iotests-055-refactor-compressed-backup-to-vmdk.patch [bz#1778593] +- kvm-iotests-055-skip-vmdk-target-tests-if-vmdk-is-not-wh.patch [bz#1778593] +- kvm-backup-Improve-error-for-bdrv_getlength-failure.patch [bz#1778593] +- kvm-backup-Make-sure-that-source-and-target-size-match.patch [bz#1778593] +- kvm-iotests-Backup-with-different-source-target-size.patch [bz#1778593] +- kvm-iotests-109-Don-t-mirror-with-mismatched-size.patch [bz#1778593] +- kvm-iotests-229-Use-blkdebug-to-inject-an-error.patch [bz#1778593] +- kvm-mirror-Make-sure-that-source-and-target-size-match.patch [bz#1778593] +- kvm-iotests-Mirror-with-different-source-target-size.patch [bz#1778593] +- Resolves: bz#1513681 + ([Intel 8.2.1 Feat] qemu-kvm PT VMX -- Fast Train) +- Resolves: bz#1778593 + (Qemu coredump when backup to a existing small size image) +- Resolves: bz#1779893 + (RFE: Copy bitmaps with qemu-img convert) +- Resolves: bz#1779904 + (RFE: ability to estimate bitmap space utilization for qcow2) +- Resolves: bz#1841038 + (qemu-img: /var/tmp/v2vovl56bced.qcow2: CURL: Error opening file: Server does not support 'range' (byte ranges) with HTTP/2 server in VMware ESXi 7) + +* Thu Jun 04 2020 Danilo Cesar Lemes de Paula - 4.2.0-23.el8 +- kvm-target-arm-Fix-PAuth-sbox-functions.patch [bz#1813940] +- kvm-Don-t-leak-memory-when-reallocation-fails.patch [bz#1749737] +- kvm-Replace-remaining-malloc-free-user-with-glib.patch [bz#1749737] +- kvm-Revert-RHEL-disable-hostmem-memfd.patch [bz#1839030] +- kvm-block-introducing-bdrv_co_delete_file-interface.patch [bz#1827630] +- kvm-block.c-adding-bdrv_co_delete_file.patch [bz#1827630] +- kvm-crypto.c-cleanup-created-file-when-block_crypto_co_c.patch [bz#1827630] +- Resolves: bz#1749737 + (CVE-2019-15890 qemu-kvm: QEMU: Slirp: use-after-free during packet reassembly [rhel-av-8]) +- Resolves: bz#1813940 + (CVE-2020-10702 virt:8.1/qemu-kvm: qemu: weak signature generation in Pointer Authentication support for ARM [rhel-av-8]) +- Resolves: bz#1827630 + (volume creation leaving uncleaned stuff behind on error (vol-clone/libvirt/qemu-kvm)) +- Resolves: bz#1839030 + (RFE: enable the "memfd" memory backend) + +* Mon May 25 2020 Danilo Cesar Lemes de Paula - 4.2.0-22.el8 +- kvm-block-always-fill-entire-LUKS-header-space-with-zero.patch [bz#1775462] +- kvm-numa-remove-not-needed-check.patch [bz#1600217] +- kvm-numa-properly-check-if-numa-is-supported.patch [bz#1600217] +- kvm-numa-Extend-CLI-to-provide-initiator-information-for.patch [bz#1600217] +- kvm-numa-Extend-CLI-to-provide-memory-latency-and-bandwi.patch [bz#1600217] +- kvm-numa-Extend-CLI-to-provide-memory-side-cache-informa.patch [bz#1600217] +- kvm-hmat-acpi-Build-Memory-Proximity-Domain-Attributes-S.patch [bz#1600217] +- kvm-hmat-acpi-Build-System-Locality-Latency-and-Bandwidt.patch [bz#1600217] +- kvm-hmat-acpi-Build-Memory-Side-Cache-Information-Struct.patch [bz#1600217] +- kvm-tests-numa-Add-case-for-QMP-build-HMAT.patch [bz#1600217] +- kvm-tests-bios-tables-test-add-test-cases-for-ACPI-HMAT.patch [bz#1600217] +- kvm-ACPI-add-expected-files-for-HMAT-tests-acpihmat.patch [bz#1600217] +- Resolves: bz#1600217 + ([Intel 8.2.1 FEAT] KVM ACPI HMAT support - qemu-kvm Fast Train) +- Resolves: bz#1775462 + (Creating luks-inside-qcow2 images with cluster_size=2k/4k will get a corrupted image) + +* Mon May 11 2020 Danilo Cesar Lemes de Paula - 4.2.0-21.el8 +- kvm-hw-pci-pcie-Forbid-hot-plug-if-it-s-disabled-on-the-.patch [bz#1820531] +- kvm-hw-pci-pcie-Replace-PCI_DEVICE-casts-with-existing-v.patch [bz#1820531] +- kvm-tools-virtiofsd-passthrough_ll-Fix-double-close.patch [bz#1817445] +- kvm-virtiofsd-add-rlimit-nofile-NUM-option.patch [bz#1817445] +- kvm-virtiofsd-stay-below-fs.file-max-sysctl-value-CVE-20.patch [bz#1817445] +- kvm-virtiofsd-jail-lo-proc_self_fd.patch [bz#1817445] +- kvm-virtiofsd-Show-submounts.patch [bz#1817445] +- kvm-virtiofsd-only-retain-file-system-capabilities.patch [bz#1817445] +- kvm-virtiofsd-drop-all-capabilities-in-the-wait-parent-p.patch [bz#1817445] +- Resolves: bz#1817445 + (CVE-2020-10717 virt:8.2/qemu-kvm: QEMU: virtiofsd: guest may open maximum file descriptor to cause DoS [rhel-av-8]) +- Resolves: bz#1820531 + (qmp command query-pci get wrong result after hotplug device under hotplug=off controller) + +* Fri May 01 2020 Jon Maloy - 4.2.0-20.el8 +- kvm-pcie_root_port-Add-hotplug-disabling-option.patch [bz#1790899] +- kvm-compat-disable-edid-for-virtio-gpu-ccw.patch [bz#1816793] +- Resolves: bz#1790899 + ([RFE] QEMU devices should have the option to enable/disable hotplug/unplug) +- Resolves: bz#1816793 + ('edid' compat handling missing for virtio-gpu-ccw) + +* Tue Apr 14 2020 Danilo Cesar Lemes de Paula - 4.2.0-19.el8_2 +- kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch [bz#1822682] +- Resolves: bz#1822682 + (QEMU-4.2 fails to start a VM on Azure) + +* Thu Apr 09 2020 Danilo Cesar Lemes de Paula - 4.2.0-18.el8_2 +- kvm-job-take-each-job-s-lock-individually-in-job_txn_app.patch [bz#1817621] +- kvm-replication-assert-we-own-context-before-job_cancel_.patch [bz#1817621] +- kvm-backup-don-t-acquire-aio_context-in-backup_clean.patch [bz#1817621] +- kvm-block-backend-Reorder-flush-pdiscard-function-defini.patch [bz#1817621] +- kvm-block-Increase-BB.in_flight-for-coroutine-and-sync-i.patch [bz#1817621] +- kvm-block-Fix-blk-in_flight-during-blk_wait_while_draine.patch [bz#1817621] +- Resolves: bz#1817621 + (Crash and deadlock with block jobs when using io-threads) + +* Mon Mar 30 2020 Danilo Cesar Lemes de Paula - 4.2.0-17.el8 +- kvm-block-pass-BlockDriver-reference-to-the-.bdrv_co_cre.patch [bz#1816007] +- kvm-block-trickle-down-the-fallback-image-creation-funct.patch [bz#1816007] +- kvm-Revert-mirror-Don-t-let-an-operation-wait-for-itself.patch [bz#1794692] +- kvm-mirror-Wait-only-for-in-flight-operations.patch [bz#1794692] +- Resolves: bz#1794692 + (Mirror block job stops making progress) +- Resolves: bz#1816007 + (qemu-img convert failed to convert with block device as target) + +* Tue Mar 24 2020 Danilo Cesar Lemes de Paula - 4.2.0-16.el8 +- kvm-migration-Rate-limit-inside-host-pages.patch [bz#1814336] +- kvm-build-sys-do-not-make-qemu-ga-link-with-pixman.patch [bz#1811670] +- Resolves: bz#1811670 + (Unneeded qemu-guest-agent dependency on pixman) +- Resolves: bz#1814336 + ([POWER9] QEMU migration-test triggers a kernel warning) + +* Tue Mar 17 2020 Danilo Cesar Lemes de Paula - 4.2.0-15.el8 +- kvm-block-nbd-Fix-hang-in-.bdrv_close.patch [bz#1640894] +- kvm-block-Generic-file-creation-fallback.patch [bz#1640894] +- kvm-file-posix-Drop-hdev_co_create_opts.patch [bz#1640894] +- kvm-iscsi-Drop-iscsi_co_create_opts.patch [bz#1640894] +- kvm-iotests-Add-test-for-image-creation-fallback.patch [bz#1640894] +- kvm-block-Fix-leak-in-bdrv_create_file_fallback.patch [bz#1640894] +- kvm-iotests-Use-complete_and_wait-in-155.patch [bz#1790482 bz#1805143] +- kvm-block-Introduce-bdrv_reopen_commit_post-step.patch [bz#1790482 bz#1805143] +- kvm-block-qcow2-Move-bitmap-reopen-into-bdrv_reopen_comm.patch [bz#1790482 bz#1805143] +- kvm-iotests-Refactor-blockdev-reopen-test-for-iothreads.patch [bz#1790482 bz#1805143] +- kvm-block-bdrv_reopen-with-backing-file-in-different-Aio.patch [bz#1790482 bz#1805143] +- kvm-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch [bz#1790482 bz#1805143] +- kvm-block-Make-bdrv_get_cumulative_perm-public.patch [bz#1790482 bz#1805143] +- kvm-block-Relax-restrictions-for-blockdev-snapshot.patch [bz#1790482 bz#1805143] +- kvm-iotests-Fix-run_job-with-use_log-False.patch [bz#1790482 bz#1805143] +- kvm-iotests-Test-mirror-with-temporarily-disabled-target.patch [bz#1790482 bz#1805143] +- kvm-block-Fix-cross-AioContext-blockdev-snapshot.patch [bz#1790482 bz#1805143] +- kvm-iotests-Add-iothread-cases-to-155.patch [bz#1790482 bz#1805143] +- kvm-qapi-Add-allow-write-only-overlay-feature-for-blockd.patch [bz#1790482 bz#1805143] +- kvm-exec-rom_reset-Free-rom-data-during-inmigrate-skip.patch [bz#1809380] +- Resolves: bz#1640894 + (Fix generic file creation fallback for qemu-img nvme:// image creation support) +- Resolves: bz#1790482 + (bitmaps in backing images can't be modified) +- Resolves: bz#1805143 + (allow late/lazy opening of backing chain for shallow blockdev-mirror) +- Resolves: bz#1809380 + (guest hang during reboot process after migration from RHEl7.8 to RHEL8.2.0.) + +* Wed Mar 11 2020 Danilo Cesar Lemes de Paula - 4.2.0-14.el8 +- kvm-hw-smbios-set-new-default-SMBIOS-fields-for-Windows-.patch [bz#1782529] +- kvm-migration-multifd-clean-pages-after-filling-packet.patch [bz#1738451] +- kvm-migration-Make-sure-that-we-don-t-call-write-in-case.patch [bz#1738451] +- kvm-migration-multifd-fix-nullptr-access-in-terminating-.patch [bz#1738451] +- kvm-migration-multifd-fix-destroyed-mutex-access-in-term.patch [bz#1738451] +- kvm-multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch [bz#1738451] +- kvm-qemu-file-Don-t-do-IO-after-shutdown.patch [bz#1738451] +- kvm-migration-Don-t-send-data-if-we-have-stopped.patch [bz#1738451] +- kvm-migration-Create-migration_is_running.patch [bz#1738451] +- kvm-migration-multifd-fix-nullptr-access-in-multifd_send.patch [bz#1738451] +- kvm-migration-Maybe-VM-is-paused-when-migration-is-cance.patch [bz#1738451] +- kvm-virtiofsd-Remove-fuse_req_getgroups.patch [bz#1797064] +- kvm-virtiofsd-fv_create_listen_socket-error-path-socket-.patch [bz#1797064] +- kvm-virtiofsd-load_capng-missing-unlock.patch [bz#1797064] +- kvm-virtiofsd-do_read-missing-NULL-check.patch [bz#1797064] +- kvm-tools-virtiofsd-fuse_lowlevel-Fix-fuse_out_header-er.patch [bz#1797064] +- kvm-virtiofsd-passthrough_ll-cleanup-getxattr-listxattr.patch [bz#1797064] +- kvm-virtiofsd-Fix-xattr-operations.patch [bz#1797064] +- Resolves: bz#1738451 + (qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel)) +- Resolves: bz#1782529 + (Windows Update Enablement with default smbios strings in qemu) +- Resolves: bz#1797064 + (virtiofsd: Fixes) + +* Sat Feb 29 2020 Danilo Cesar Lemes de Paula - 4.2.0-13.el8 +- kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch [bz#1791648] +- kvm-target-i386-add-a-ucode-rev-property.patch [bz#1791648] +- kvm-target-i386-kvm-initialize-microcode-revision-from-K.patch [bz#1791648] +- kvm-target-i386-fix-TCG-UCODE_REV-access.patch [bz#1791648] +- kvm-target-i386-check-for-availability-of-MSR_IA32_UCODE.patch [bz#1791648] +- kvm-target-i386-enable-monitor-and-ucode-revision-with-c.patch [bz#1791648] +- kvm-qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch [bz#1703907] +- kvm-mirror-Store-MirrorOp.co-for-debuggability.patch [bz#1794692] +- kvm-mirror-Don-t-let-an-operation-wait-for-itself.patch [bz#1794692] +- Resolves: bz#1703907 + ([upstream]QEMU coredump when converting to qcow2: external data file images on block devices with copy_offloading) +- Resolves: bz#1791648 + ([RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough) +- Resolves: bz#1794692 + (Mirror block job stops making progress) + +* Mon Feb 24 2020 Danilo Cesar Lemes de Paula - 4.2.0-12.el8 +- kvm-vhost-user-gpu-Drop-trailing-json-comma.patch [bz#1805334] +- Resolves: bz#1805334 + (vhost-user/50-qemu-gpu.json is not valid JSON) + +* Sun Feb 23 2020 Danilo Cesar Lemes de Paula - 4.2.0-11.el8 +- kvm-spapr-Enable-DD2.3-accelerated-count-cache-flush-in-.patch [bz#1796240] +- kvm-util-add-slirp_fmt-helpers.patch [bz#1798994] +- kvm-tcp_emu-fix-unsafe-snprintf-usages.patch [bz#1798994] +- kvm-virtio-add-ability-to-delete-vq-through-a-pointer.patch [bz#1791590] +- kvm-virtio-make-virtio_delete_queue-idempotent.patch [bz#1791590] +- kvm-virtio-reset-region-cache-when-on-queue-deletion.patch [bz#1791590] +- kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch [bz#1791590] +- Resolves: bz#1791590 + ([Q35] No "DEVICE_DELETED" event in qmp after unplug virtio-net-pci device) +- Resolves: bz#1796240 + (Enable hw accelerated cache-count-flush by default for POWER9 DD2.3 cpus) +- Resolves: bz#1798994 + (CVE-2020-8608 qemu-kvm: QEMU: Slirp: potential OOB access due to unsafe snprintf() usages [rhel-av-8.2.0]) + +* Fri Feb 14 2020 Danilo Cesar Lemes de Paula - 4.2.0-10.el8 +- kvm-i386-Resolve-CPU-models-to-v1-by-default.patch [bz#1779078 bz#1787291 bz#1779078 bz#1779078] +- kvm-iotests-Support-job-complete-in-run_job.patch [bz#1781637] +- kvm-iotests-Create-VM.blockdev_create.patch [bz#1781637] +- kvm-block-Activate-recursively-even-for-already-active-n.patch [bz#1781637] +- kvm-hmp-Allow-using-qdev-ID-for-qemu-io-command.patch [bz#1781637] +- kvm-iotests-Test-external-snapshot-with-VM-state.patch [bz#1781637] +- kvm-iotests.py-Let-wait_migration-wait-even-more.patch [bz#1781637] +- kvm-blockdev-fix-coding-style-issues-in-drive_backup_pre.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-blockdev-honor-bdrv_try_set_aio_context-context-requ.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-backup-top-Begin-drain-earlier.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-block-backup-top-Don-t-acquire-context-while-droppin.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-blockdev-Acquire-AioContext-on-dirty-bitmap-function.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-blockdev-Return-bs-to-the-proper-context-on-snapshot.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-iotests-Test-handling-of-AioContexts-with-some-block.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-target-arm-monitor-query-cpu-model-expansion-crashed.patch [bz#1801320] +- kvm-docs-arm-cpu-features-Make-kvm-no-adjvtime-comment-c.patch [bz#1801320] +- Resolves: bz#1745606 + (Qemu hang when do incremental live backup in transaction mode without bitmap) +- Resolves: bz#1746217 + (Src qemu hang when do storage vm migration during guest installation) +- Resolves: bz#1773517 + (Src qemu hang when do storage vm migration with dataplane enable) +- Resolves: bz#1779036 + (Qemu coredump when do snapshot in transaction mode with one snapshot path not exist) +- Resolves: bz#1779078 + (RHVH 4.4: Failed to run VM on 4.3/4.4 engine (Exit message: the CPU is incompatible with host CPU: Host CPU does not provide required features: hle, rtm)) +- Resolves: bz#1781637 + (qemu crashed when do mem and disk snapshot) +- Resolves: bz#1782111 + (Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable)) +- Resolves: bz#1782175 + (Qemu core dump when add persistent bitmap(data plane enable)) +- Resolves: bz#1783965 + (Qemu core dump when do backup with sync: bitmap and no bitmap provided) +- Resolves: bz#1787291 + (RHVH 4.4: Failed to run VM on 4.3/4.4 engine (Exit message: the CPU is incompatible with host CPU: Host CPU does not provide required features: hle, rtm) [rhel-8.1.0.z]) +- Resolves: bz#1801320 + (aarch64: backport query-cpu-model-expansion and adjvtime document fixes) + +* Mon Feb 10 2020 Danilo Cesar Lemes de Paula - 4.2.0-9.el8 +- kvm-ppc-Deassert-the-external-interrupt-pin-in-KVM-on-re.patch [bz#1776638] +- kvm-xics-Don-t-deassert-outputs.patch [bz#1776638] +- kvm-ppc-Don-t-use-CPUPPCState-irq_input_state-with-moder.patch [bz#1776638] +- kvm-trace-update-qemu-trace-stap-to-Python-3.patch [bz#1787395] +- kvm-redhat-Remove-redundant-fix-for-qemu-trace-stap.patch [bz#1787395] +- kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch [bz#1794503] +- kvm-tpm-ppi-page-align-PPI-RAM.patch [bz#1787444] +- kvm-target-arm-kvm-trivial-Clean-up-header-documentation.patch [bz#1647366] +- kvm-target-arm-kvm64-kvm64-cpus-have-timer-registers.patch [bz#1647366] +- kvm-tests-arm-cpu-features-Check-feature-default-values.patch [bz#1647366] +- kvm-target-arm-kvm-Implement-virtual-time-adjustment.patch [bz#1647366] +- kvm-target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch [bz#1647366] +- kvm-migration-Define-VMSTATE_INSTANCE_ID_ANY.patch [bz#1529231] +- kvm-migration-Change-SaveStateEntry.instance_id-into-uin.patch [bz#1529231] +- kvm-apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch [bz#1529231] +- Resolves: bz#1529231 + ([q35] VM hangs after migration with 200 vCPUs) +- Resolves: bz#1647366 + (aarch64: Add support for the kvm-no-adjvtime ARM CPU feature) +- Resolves: bz#1776638 + (Guest failed to boot up after system_reset 20 times) +- Resolves: bz#1787395 + (qemu-trace-stap list : TypeError: startswith first arg must be bytes or a tuple of bytes, not str) +- Resolves: bz#1787444 + (Broken postcopy migration with vTPM device) +- Resolves: bz#1794503 + (CVE-2020-1711 qemu-kvm: QEMU: block: iscsi: OOB heap access via an unexpected response of iSCSI Server [rhel-av-8.2.0]) + +* Fri Jan 31 2020 Miroslav Rezanina - 4.2.0-8.el8 +- kvm-target-arm-arch_dump-Add-SVE-notes.patch [bz#1725084] +- kvm-vhost-Add-names-to-section-rounded-warning.patch [bz#1779041] +- kvm-vhost-Only-align-sections-for-vhost-user.patch [bz#1779041] +- kvm-vhost-coding-style-fix.patch [bz#1779041] +- kvm-virtio-fs-fix-MSI-X-nvectors-calculation.patch [bz#1694164] +- kvm-vhost-user-fs-remove-vhostfd-property.patch [bz#1694164] +- kvm-build-rename-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch [bz#1694164] +- kvm-virtiofsd-Pull-in-upstream-headers.patch [bz#1694164] +- kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch [bz#1694164] +- kvm-virtiofsd-Add-auxiliary-.c-s.patch [bz#1694164] +- kvm-virtiofsd-Add-fuse_lowlevel.c.patch [bz#1694164] +- kvm-virtiofsd-Add-passthrough_ll.patch [bz#1694164] +- kvm-virtiofsd-Trim-down-imported-files.patch [bz#1694164] +- kvm-virtiofsd-Format-imported-files-to-qemu-style.patch [bz#1694164] +- kvm-virtiofsd-remove-mountpoint-dummy-argument.patch [bz#1694164] +- kvm-virtiofsd-remove-unused-notify-reply-support.patch [bz#1694164] +- kvm-virtiofsd-Remove-unused-enum-fuse_buf_copy_flags.patch [bz#1694164] +- kvm-virtiofsd-Fix-fuse_daemonize-ignored-return-values.patch [bz#1694164] +- kvm-virtiofsd-Fix-common-header-and-define-for-QEMU-buil.patch [bz#1694164] +- kvm-virtiofsd-Trim-out-compatibility-code.patch [bz#1694164] +- kvm-vitriofsd-passthrough_ll-fix-fallocate-ifdefs.patch [bz#1694164] +- kvm-virtiofsd-Make-fsync-work-even-if-only-inode-is-pass.patch [bz#1694164] +- kvm-virtiofsd-Add-options-for-virtio.patch [bz#1694164] +- kvm-virtiofsd-add-o-source-PATH-to-help-output.patch [bz#1694164] +- kvm-virtiofsd-Open-vhost-connection-instead-of-mounting.patch [bz#1694164] +- kvm-virtiofsd-Start-wiring-up-vhost-user.patch [bz#1694164] +- kvm-virtiofsd-Add-main-virtio-loop.patch [bz#1694164] +- kvm-virtiofsd-get-set-features-callbacks.patch [bz#1694164] +- kvm-virtiofsd-Start-queue-threads.patch [bz#1694164] +- kvm-virtiofsd-Poll-kick_fd-for-queue.patch [bz#1694164] +- kvm-virtiofsd-Start-reading-commands-from-queue.patch [bz#1694164] +- kvm-virtiofsd-Send-replies-to-messages.patch [bz#1694164] +- kvm-virtiofsd-Keep-track-of-replies.patch [bz#1694164] +- kvm-virtiofsd-Add-Makefile-wiring-for-virtiofsd-contrib.patch [bz#1694164] +- kvm-virtiofsd-Fast-path-for-virtio-read.patch [bz#1694164] +- kvm-virtiofsd-add-fd-FDNUM-fd-passing-option.patch [bz#1694164] +- kvm-virtiofsd-make-f-foreground-the-default.patch [bz#1694164] +- kvm-virtiofsd-add-vhost-user.json-file.patch [bz#1694164] +- kvm-virtiofsd-add-print-capabilities-option.patch [bz#1694164] +- kvm-virtiofs-Add-maintainers-entry.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-create-new-files-in-caller-.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-add-lo_map-for-ino-fh-indir.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-add-ino_map-to-hide-lo_inod.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-add-dirp_map-to-hide-lo_dir.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-add-fd_map-to-hide-file-des.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch [bz#1694164] +- kvm-virtiofsd-validate-path-components.patch [bz#1694164] +- kvm-virtiofsd-Plumb-fuse_bufvec-through-to-do_write_buf.patch [bz#1694164] +- kvm-virtiofsd-Pass-write-iov-s-all-the-way-through.patch [bz#1694164] +- kvm-virtiofsd-add-fuse_mbuf_iter-API.patch [bz#1694164] +- kvm-virtiofsd-validate-input-buffer-sizes-in-do_write_bu.patch [bz#1694164] +- kvm-virtiofsd-check-input-buffer-size-in-fuse_lowlevel.c.patch [bz#1694164] +- kvm-virtiofsd-prevent-.-escape-in-lo_do_lookup.patch [bz#1694164] +- kvm-virtiofsd-prevent-.-escape-in-lo_do_readdir.patch [bz#1694164] +- kvm-virtiofsd-use-proc-self-fd-O_PATH-file-descriptor.patch [bz#1694164] +- kvm-virtiofsd-sandbox-mount-namespace.patch [bz#1694164] +- kvm-virtiofsd-move-to-an-empty-network-namespace.patch [bz#1694164] +- kvm-virtiofsd-move-to-a-new-pid-namespace.patch [bz#1694164] +- kvm-virtiofsd-add-seccomp-whitelist.patch [bz#1694164] +- kvm-virtiofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch [bz#1694164] +- kvm-virtiofsd-cap-ng-helpers.patch [bz#1694164] +- kvm-virtiofsd-Drop-CAP_FSETID-if-client-asked-for-it.patch [bz#1694164] +- kvm-virtiofsd-set-maximum-RLIMIT_NOFILE-limit.patch [bz#1694164] +- kvm-virtiofsd-fix-libfuse-information-leaks.patch [bz#1694164] +- kvm-virtiofsd-add-syslog-command-line-option.patch [bz#1694164] +- kvm-virtiofsd-print-log-only-when-priority-is-high-enoug.patch [bz#1694164] +- kvm-virtiofsd-Add-ID-to-the-log-with-FUSE_LOG_DEBUG-leve.patch [bz#1694164] +- kvm-virtiofsd-Add-timestamp-to-the-log-with-FUSE_LOG_DEB.patch [bz#1694164] +- kvm-virtiofsd-Handle-reinit.patch [bz#1694164] +- kvm-virtiofsd-Handle-hard-reboot.patch [bz#1694164] +- kvm-virtiofsd-Kill-threads-when-queues-are-stopped.patch [bz#1694164] +- kvm-vhost-user-Print-unexpected-slave-message-types.patch [bz#1694164] +- kvm-contrib-libvhost-user-Protect-slave-fd-with-mutex.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-add-renameat2-support.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-disable-readdirplus-on-cach.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-control-readdirplus.patch [bz#1694164] +- kvm-virtiofsd-rename-unref_inode-to-unref_inode_lolocked.patch [bz#1694164] +- kvm-virtiofsd-fail-when-parent-inode-isn-t-known-in-lo_d.patch [bz#1694164] +- kvm-virtiofsd-extract-root-inode-init-into-setup_root.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-clean-up-cache-related-opti.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-use-hashtable.patch [bz#1694164] +- kvm-virtiofsd-Clean-up-inodes-on-destroy.patch [bz#1694164] +- kvm-virtiofsd-support-nanosecond-resolution-for-file-tim.patch [bz#1694164] +- kvm-virtiofsd-fix-error-handling-in-main.patch [bz#1694164] +- kvm-virtiofsd-cleanup-allocated-resource-in-se.patch [bz#1694164] +- kvm-virtiofsd-fix-memory-leak-on-lo.source.patch [bz#1694164] +- kvm-virtiofsd-add-helper-for-lo_data-cleanup.patch [bz#1694164] +- kvm-virtiofsd-Prevent-multiply-running-with-same-vhost_u.patch [bz#1694164] +- kvm-virtiofsd-enable-PARALLEL_DIROPS-during-INIT.patch [bz#1694164] +- kvm-virtiofsd-fix-incorrect-error-handling-in-lo_do_look.patch [bz#1694164] +- kvm-Virtiofsd-fix-memory-leak-on-fuse-queueinfo.patch [bz#1694164] +- kvm-virtiofsd-Support-remote-posix-locks.patch [bz#1694164] +- kvm-virtiofsd-use-fuse_lowlevel_is_virtio-in-fuse_sessio.patch [bz#1694164] +- kvm-virtiofsd-prevent-fv_queue_thread-vs-virtio_loop-rac.patch [bz#1694164] +- kvm-virtiofsd-make-lo_release-atomic.patch [bz#1694164] +- kvm-virtiofsd-prevent-races-with-lo_dirp_put.patch [bz#1694164] +- kvm-virtiofsd-rename-inode-refcount-to-inode-nlookup.patch [bz#1694164] +- kvm-libvhost-user-Fix-some-memtable-remap-cases.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-fix-refcounting-on-remove-r.patch [bz#1694164] +- kvm-virtiofsd-introduce-inode-refcount-to-prevent-use-af.patch [bz#1694164] +- kvm-virtiofsd-do-not-always-set-FUSE_FLOCK_LOCKS.patch [bz#1694164] +- kvm-virtiofsd-convert-more-fprintf-and-perror-to-use-fus.patch [bz#1694164] +- kvm-virtiofsd-Reset-O_DIRECT-flag-during-file-open.patch [bz#1694164] +- kvm-virtiofsd-Fix-data-corruption-with-O_APPEND-write-in.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-Use-cache_readdir-for-direc.patch [bz#1694164] +- kvm-virtiofsd-add-definition-of-fuse_buf_writev.patch [bz#1694164] +- kvm-virtiofsd-use-fuse_buf_writev-to-replace-fuse_buf_wr.patch [bz#1694164] +- kvm-virtiofsd-process-requests-in-a-thread-pool.patch [bz#1694164] +- kvm-virtiofsd-prevent-FUSE_INIT-FUSE_DESTROY-races.patch [bz#1694164] +- kvm-virtiofsd-fix-lo_destroy-resource-leaks.patch [bz#1694164] +- kvm-virtiofsd-add-thread-pool-size-NUM-option.patch [bz#1694164] +- kvm-virtiofsd-Convert-lo_destroy-to-take-the-lo-mutex-lo.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch [bz#1694164] +- kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch [bz#1694164] +- kvm-virtiofsd-add-some-options-to-the-help-message.patch [bz#1694164] +- kvm-redhat-ship-virtiofsd-vhost-user-device-backend.patch [bz#1694164] +- Resolves: bz#1694164 + (virtio-fs: host<->guest shared file system (qemu)) +- Resolves: bz#1725084 + (aarch64: support dumping SVE registers) +- Resolves: bz#1779041 + (netkvm: no connectivity Windows guest with q35 + hugepages + vhost + hv_synic) + +* Tue Jan 21 2020 Miroslav Rezanina - 4.2.0-7.el8 +- kvm-tcp_emu-Fix-oob-access.patch [bz#1791568] +- kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch [bz#1791568] +- kvm-slirp-use-correct-size-while-emulating-commands.patch [bz#1791568] +- kvm-RHEL-hw-i386-disable-nested-PERF_GLOBAL_CTRL-MSR-sup.patch [bz#1559846] +- Resolves: bz#1559846 + (Nested KVM: limit VMX features according to CPU models - Fast Train) +- Resolves: bz#1791568 + (CVE-2020-7039 qemu-kvm: QEMU: slirp: OOB buffer access while emulating tcp protocols in tcp_emu() [rhel-av-8.2.0]) + +* Wed Jan 15 2020 Danilo Cesar Lemes de Paula - 4.2.0-6.el8 +- kvm-spapr-Don-t-trigger-a-CAS-reboot-for-XICS-XIVE-mode-.patch [bz#1733893] +- kvm-vfio-pci-Don-t-remove-irqchip-notifier-if-not-regist.patch [bz#1782678] +- kvm-virtio-don-t-enable-notifications-during-polling.patch [bz#1789301] +- kvm-usbredir-Prevent-recursion-in-usbredir_write.patch [bz#1790844] +- kvm-xhci-recheck-slot-status.patch [bz#1790844] +- Resolves: bz#1733893 + (Boot a guest with "-prom-env 'auto-boot?=false'", SLOF failed to enter the boot entry after input "boot" followed by "0 > " on VNC) +- Resolves: bz#1782678 + (qemu core dump after hot-unplugging the XXV710/XL710 PF) +- Resolves: bz#1789301 + (virtio-blk/scsi: fix notification suppression during AioContext polling) +- Resolves: bz#1790844 + (USB related fixes) + +* Tue Jan 07 2020 Danilo Cesar Lemes de Paula - 4.2.0-5.el8 +- kvm-i386-Remove-cpu64-rhel6-CPU-model.patch [bz#1741345] +- kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch [bz#1772774] +- Resolves: bz#1741345 + (Remove the "cpu64-rhel6" CPU from qemu-kvm) +- Resolves: bz#1772774 + (qemu-kvm core dump during migration+reboot ( Assertion `mem->dirty_bmap' failed )) + +* Fri Dec 13 2019 Danilo Cesar Lemes de Paula - 4.2.0-4.el8 +- Rebase to qemu-4.2 +- Resolves: bz#1783250 + (rebase qemu-kvm to 4.2) + +* Tue Dec 10 2019 Danilo Cesar Lemes de Paula - 4.1.0-18.el8 +- kvm-LUKS-support-preallocation.patch [bz#1534951] +- kvm-nbd-add-empty-.bdrv_reopen_prepare.patch [bz#1718727] +- kvm-qdev-qbus-add-hidden-device-support.patch [bz#1757796] +- kvm-pci-add-option-for-net-failover.patch [bz#1757796] +- kvm-pci-mark-devices-partially-unplugged.patch [bz#1757796] +- kvm-pci-mark-device-having-guest-unplug-request-pending.patch [bz#1757796] +- kvm-qapi-add-unplug-primary-event.patch [bz#1757796] +- kvm-qapi-add-failover-negotiated-event.patch [bz#1757796] +- kvm-migration-allow-unplug-during-migration-for-failover.patch [bz#1757796] +- kvm-migration-add-new-migration-state-wait-unplug.patch [bz#1757796] +- kvm-libqos-tolerate-wait-unplug-migration-state.patch [bz#1757796] +- kvm-net-virtio-add-failover-support.patch [bz#1757796] +- kvm-vfio-unplug-failover-primary-device-before-migration.patch [bz#1757796] +- kvm-net-virtio-fix-dev_unplug_pending.patch [bz#1757796] +- kvm-net-virtio-return-early-when-failover-primary-alread.patch [bz#1757796] +- kvm-net-virtio-fix-re-plugging-of-primary-device.patch [bz#1757796] +- kvm-net-virtio-return-error-when-device_opts-arg-is-NULL.patch [bz#1757796] +- kvm-vfio-don-t-ignore-return-value-of-migrate_add_blocke.patch [bz#1757796] +- kvm-hw-vfio-pci-Fix-double-free-of-migration_blocker.patch [bz#1757796] +- Resolves: bz#1534951 + (RFE: Support preallocation mode for luks format) +- Resolves: bz#1718727 + (Committing changes to the backing file over NBD fails with reopening files not supported) +- Resolves: bz#1757796 + (RFE: support for net failover devices in qemu) + +* Mon Dec 02 2019 Danilo Cesar Lemes de Paula - 4.1.0-17.el8 +- kvm-qemu-pr-helper-fix-crash-in-mpath_reconstruct_sense.patch [bz#1772322] +- Resolves: bz#1772322 + (qemu-pr-helper: fix crash in mpath_reconstruct_sense) + +* Wed Nov 27 2019 Danilo Cesar Lemes de Paula - 4.1.0-16.el8 +- kvm-curl-Keep-pointer-to-the-CURLState-in-CURLSocket.patch [bz#1745209] +- kvm-curl-Keep-socket-until-the-end-of-curl_sock_cb.patch [bz#1745209] +- kvm-curl-Check-completion-in-curl_multi_do.patch [bz#1745209] +- kvm-curl-Pass-CURLSocket-to-curl_multi_do.patch [bz#1745209] +- kvm-curl-Report-only-ready-sockets.patch [bz#1745209] +- kvm-curl-Handle-success-in-multi_check_completion.patch [bz#1745209] +- kvm-curl-Check-curl_multi_add_handle-s-return-code.patch [bz#1745209] +- kvm-vhost-user-save-features-if-the-char-dev-is-closed.patch [bz#1738768] +- kvm-block-snapshot-Restrict-set-of-snapshot-nodes.patch [bz#1658981] +- kvm-iotests-Test-internal-snapshots-with-blockdev.patch [bz#1658981] +- kvm-qapi-Add-feature-flags-to-commands-in-qapi-introspec.patch [bz#1658981] +- kvm-qapi-Allow-introspecting-fix-for-savevm-s-cooperatio.patch [bz#1658981] +- kvm-block-Remove-backing-null-from-bs-explicit_-options.patch [bz#1773925] +- kvm-iotests-Test-multiple-blockdev-snapshot-calls.patch [bz#1773925] +- Resolves: bz#1658981 + (qemu failed to create internal snapshot via 'savevm' when using blockdev) +- Resolves: bz#1738768 + (Guest fails to recover receiving packets after vhost-user reconnect) +- Resolves: bz#1745209 + (qemu-img gets stuck when stream-converting from http) +- Resolves: bz#1773925 + (Fail to do blockcommit with more than one snapshots) + +* Thu Nov 14 2019 Danilo Cesar Lemes de Paula - 4.1.0-15.el8 +- kvm-virtio-blk-Add-blk_drain-to-virtio_blk_device_unreal.patch [bz#1706759] +- kvm-Revert-qcow2-skip-writing-zero-buffers-to-empty-COW-.patch [bz#1772473] +- kvm-coroutine-Add-qemu_co_mutex_assert_locked.patch [bz#1772473] +- kvm-qcow2-Fix-corruption-bug-in-qcow2_detect_metadata_pr.patch [bz#1772473] +- Resolves: bz#1706759 + (qemu core dump when unplug a 16T GPT type disk from win2019 guest) +- Resolves: bz#1772473 + (Import fixes from 8.1.0 into 8.1.1 branch) + +* Tue Oct 29 2019 Danilo Cesar Lemes de Paula - 4.1.0-14.el8 +- kvm-Revert-qcow2-skip-writing-zero-buffers-to-empty-COW-.patch [bz#1751934] +- kvm-coroutine-Add-qemu_co_mutex_assert_locked.patch [bz#1764721] +- kvm-qcow2-Fix-corruption-bug-in-qcow2_detect_metadata_pr.patch [bz#1764721] +- Resolves: bz#1751934 + (Fail to install guest when xfs is the host filesystem) +- Resolves: bz#1764721 + (qcow2 image corruption due to incorrect locking in preallocation detection) + +* Fri Sep 27 2019 Danilo Cesar Lemes de Paula - 4.1.0-13.el8 +- kvm-nbd-server-attach-client-channel-to-the-export-s-Aio.patch [bz#1748253] +- kvm-virtio-blk-schedule-virtio_notify_config-to-run-on-m.patch [bz#1744955] +- Resolves: bz#1744955 + (Qemu hang when block resize a qcow2 image) +- Resolves: bz#1748253 + (QEMU crashes (core dump) when using the integrated NDB server with data-plane) + +* Thu Sep 26 2019 Danilo Cesar Lemes de Paula - 4.1.0-12.el8 +- kvm-block-Use-QEMU_IS_ALIGNED.patch [bz#1745922] +- kvm-block-qcow2-Fix-corruption-introduced-by-commit-8ac0.patch [bz#1745922] +- kvm-block-qcow2-refactor-encryption-code.patch [bz#1745922] +- kvm-qemu-iotests-Add-test-for-bz-1745922.patch [bz#1745922] +- Resolves: bz#1745922 + (Luks-inside-qcow2 snapshot cannot boot after 'qemu-img rebase') + +* Mon Sep 23 2019 Danilo Cesar Lemes de Paula - 4.1.0-11.el8 +- kvm-blockjob-update-nodes-head-while-removing-all-bdrv.patch [bz#1746631] +- kvm-hostmem-file-fix-pmem-file-size-check.patch [bz#1724008 bz#1736788] +- kvm-memory-fetch-pmem-size-in-get_file_size.patch [bz#1724008 bz#1736788] +- kvm-pr-manager-Fix-invalid-g_free-crash-bug.patch [bz#1753992] +- Resolves: bz#1724008 + (QEMU core dumped "memory_region_get_ram_ptr: Assertion `mr->ram_block' failed") +- Resolves: bz#1736788 + (QEMU core dumped if boot guest with nvdimm backed by /dev/dax0.0 and option pmem=off) +- Resolves: bz#1746631 + (Qemu core dump when do block commit under stress) +- Resolves: bz#1753992 + (core dump when testing persistent reservation in guest) + +* Mon Sep 16 2019 Danilo Cesar Lemes de Paula - 4.1.0-10.el8 +- kvm-spapr-xive-Mask-the-EAS-when-allocating-an-IRQ.patch [bz#1748725] +- kvm-block-create-Do-not-abort-if-a-block-driver-is-not-a.patch [bz#1746267] +- kvm-virtio-blk-Cancel-the-pending-BH-when-the-dataplane-.patch [bz#1717321] +- kvm-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch [bz#1749737] +- Resolves: bz#1717321 + (qemu-kvm core dumped when repeat "system_reset" multiple times during guest boot) +- Resolves: bz#1746267 + (qemu coredump: qemu-kvm: block/create.c:68: qmp_blockdev_create: Assertion `drv' failed) +- Resolves: bz#1748725 + ([ppc][migration][v6.3-rc1-p1ce8930]basic migration failed with "qemu-kvm: KVM_SET_DEVICE_ATTR failed: Group 3 attr 0x0000000000001309: Device or resource busy") +- Resolves: bz#1749737 + (CVE-2019-15890 qemu-kvm: QEMU: Slirp: use-after-free during packet reassembly [rhel-av-8]) + +* Tue Sep 10 2019 Danilo Cesar Lemes de Paula - 4.1.0-9.el8 +- kvm-migration-always-initialise-ram_counters-for-a-new-m.patch [bz#1734316] +- kvm-migration-add-qemu_file_update_transfer-interface.patch [bz#1734316] +- kvm-migration-add-speed-limit-for-multifd-migration.patch [bz#1734316] +- kvm-migration-update-ram_counters-for-multifd-sync-packe.patch [bz#1734316] +- kvm-spapr-pci-Consolidate-de-allocation-of-MSIs.patch [bz#1750200] +- kvm-spapr-pci-Free-MSIs-during-reset.patch [bz#1750200] +- Resolves: bz#1734316 + (multifd migration does not honour speed limits, consumes entire bandwidth of NIC) +- Resolves: bz#1750200 + ([RHEL8.1][QEMU4.1]boot up guest with vf device,then system_reset guest,error prompt(qemu-kvm: Can't allocate MSIs for device 2800: IRQ 4904 is not free)) + +* Mon Sep 09 2019 Danilo Cesar Lemes de Paula - 4.1.0-8.el8 +- kvm-migration-Do-not-re-read-the-clock-on-pre_save-in-ca.patch [bz#1747836] +- kvm-ehci-fix-queue-dev-null-ptr-dereference.patch [bz#1746790] +- kvm-spapr-Use-SHUTDOWN_CAUSE_SUBSYSTEM_RESET-for-CAS-reb.patch [bz#1743477] +- kvm-file-posix-Handle-undetectable-alignment.patch [bz#1749134] +- kvm-block-posix-Always-allocate-the-first-block.patch [bz#1749134] +- kvm-iotests-Test-allocate_first_block-with-O_DIRECT.patch [bz#1749134] +- Resolves: bz#1743477 + (Since bd94bc06479a "spapr: change default interrupt mode to 'dual'", QEMU resets the machine to select the appropriate interrupt controller. And -no-reboot prevents that.) +- Resolves: bz#1746790 + (qemu core dump while migrate from RHEL7.6 to RHEL8.1) +- Resolves: bz#1747836 + (Call traces after guest migration due to incorrect handling of the timebase) +- Resolves: bz#1749134 + (I/O error when virtio-blk disk is backed by a raw image on 4k disk) + +* Fri Sep 06 2019 Danilo Cesar Lemes de Paula - 4.1.0-7.el8 +- kvm-trace-Clarify-DTrace-SystemTap-help-message.patch [bz#1516220] +- kvm-socket-Add-backlog-parameter-to-socket_listen.patch [bz#1726898] +- kvm-socket-Add-num-connections-to-qio_channel_socket_syn.patch [bz#1726898] +- kvm-socket-Add-num-connections-to-qio_channel_socket_asy.patch [bz#1726898] +- kvm-socket-Add-num-connections-to-qio_net_listener_open_.patch [bz#1726898] +- kvm-multifd-Use-number-of-channels-as-listen-backlog.patch [bz#1726898] +- kvm-pseries-Fix-compat_pvr-on-reset.patch [bz#1744107] +- kvm-spapr-Set-compat-mode-in-spapr_core_plug.patch [bz#1744107] +- Resolves: bz#1516220 + (-trace help prints an incomplete list of trace events) +- Resolves: bz#1726898 + (Parallel migration fails with error "Unable to write to socket: Connection reset by peer" now and then) +- Resolves: bz#1744107 + (Migration from P8(qemu4.1) to P9(qemu4.1), after migration, qemu crash on destination with error message "qemu-kvm: error while loading state for instance 0x1 of device 'cpu'") + +* Wed Sep 04 2019 Danilo Cesar Lemes de Paula - 4.1.0-6.el8 +- kvm-memory-Refactor-memory_region_clear_coalescing.patch [bz#1743142] +- kvm-memory-Split-zones-when-do-coalesced_io_del.patch [bz#1743142] +- kvm-memory-Remove-has_coalesced_range-counter.patch [bz#1743142] +- kvm-memory-Fix-up-memory_region_-add-del-_coalescing.patch [bz#1743142] +- kvm-enable-virgl-for-real-this-time.patch [bz#1559740] +- Resolves: bz#1559740 + ([RFE] Enable virgl as TechPreview (qemu)) +- Resolves: bz#1743142 + (Boot guest with multiple e1000 devices, qemu will crash after several guest reboots: kvm_mem_ioeventfd_add: error adding ioeventfd: No space left on device (28)) + +* Tue Aug 27 2019 Danilo Cesar Lemes de Paula - 4.1.0-5.el8 +- kvm-redhat-s390x-Rename-s390-ccw-virtio-rhel8.0.0-to-s39.patch [bz#1693772] +- kvm-redhat-s390x-Add-proper-compatibility-options-for-th.patch [bz#1693772] +- kvm-enable-virgl.patch [bz#1559740] +- kvm-redhat-update-pseries-rhel8.1.0-machine-type.patch [bz#1744170] +- kvm-Do-not-run-iotests-on-brew-build.patch [bz#1742197 bz#1742819] +- Resolves: bz#1559740 + ([RFE] Enable virgl as TechPreview (qemu)) +- Resolves: bz#1693772 + ([IBM zKVM] RHEL AV 8.1.0 machine type update for s390x) +- Resolves: bz#1742197 + (Remove iotests from qemu-kvm builds [RHEL AV 8.1.0]) +- Resolves: bz#1742819 + (Remove iotests from qemu-kvm builds [RHEL 8.1.0]) +- Resolves: bz#1744170 + ([IBM Power] New 8.1.0 machine type for pseries) + +* Tue Aug 20 2019 Danilo Cesar Lemes de Paula - 4.1.0-4.el8 +- kvm-RHEL-disable-hostmem-memfd.patch [bz#1738626 bz#1740797] +- Resolves: bz#1738626 + (Disable memfd in QEMU) +- Resolves: bz#1740797 + (Disable memfd in QEMU) + +* Mon Aug 19 2019 Danilo Cesar Lemes de Paula - 4.1.0-3.el8 +- kvm-x86-machine-types-pc_rhel_8_0_compat.patch [bz#1719649] +- kvm-x86-machine-types-q35-Fixup-units_per_default_bus.patch [bz#1719649] +- kvm-x86-machine-types-Fixup-dynamic-sysbus-entries.patch [bz#1719649] +- kvm-x86-machine-types-add-pc-q35-rhel8.1.0.patch [bz#1719649] +- kvm-machine-types-Update-hw_compat_rhel_8_0-from-hw_comp.patch [bz#1719649] +- kvm-virtio-Make-disable-legacy-disable-modern-compat-pro.patch [bz#1719649] +- Resolves: bz#1719649 + (8.1 machine type for x86) + +* Mon Aug 19 2019 Danilo Cesar Lemes de Paula - 4.1.0-2.el8 +- kvm-spec-Update-seavgabios-dependency.patch [bz#1725664] +- kvm-pc-Don-t-make-die-id-mandatory-unless-necessary.patch [bz#1741451] +- kvm-display-bochs-fix-pcie-support.patch [bz#1733977 bz#1740692] +- kvm-spapr-Reset-CAS-IRQ-subsystem-after-devices.patch [bz#1733977] +- kvm-spapr-xive-Fix-migration-of-hot-plugged-CPUs.patch [bz#1733977] +- kvm-riscv-roms-Fix-make-rules-for-building-sifive_u-bios.patch [bz#1733977 bz#1740692] +- kvm-Update-version-for-v4.1.0-release.patch [bz#1733977 bz#1740692] +- Resolves: bz#1725664 + (Update seabios dependency) +- Resolves: bz#1733977 + (Qemu core dumped: /home/ngu/qemu/hw/intc/xics_kvm.c:321: ics_kvm_set_irq: Assertion `kernel_xics_fd != -1' failed) +- Resolves: bz#1740692 + (Backport QEMU 4.1.0 rc5 & ga patches) +- Resolves: bz#1741451 + (Failed to hot-plug vcpus) + +* Wed Aug 14 2019 Miroslav Rezanina - 4.1.0-1.el8 +- Rebase to qemu 4.1.0 rc4 [bz#1705235] +- Resolves: bz#1705235 + (Rebase qemu-kvm for RHEL-AV 8.1.0) + +* Tue Jul 23 2019 Danilo Cesar Lemes de Paula - 4.0.0-6.el8 +- kvm-x86_64-rh-devices-add-missing-TPM-passthrough.patch [bz#1519013] +- kvm-x86_64-rh-devices-enable-TPM-emulation.patch [bz#1519013] +- kvm-vfio-increase-the-cap-on-number-of-assigned-devices-.patch [bz#1719823] +- Resolves: bz#1519013 + ([RFE] QEMU Software TPM support (vTPM, or TPM emulation)) +- Resolves: bz#1719823 + ([RHEL 8.1] [RFE] increase the maximum of vfio devices to more than 32 in qemu-kvm) + +* Mon Jul 08 2019 Miroslav Rezanina - 4.0.0-5.el8 +- kvm-qemu-kvm.spec-bump-libseccomp-2.4.0.patch [bz#1720306] +- kvm-qxl-check-release-info-object.patch [bz#1712717] +- kvm-target-i386-add-MDS-NO-feature.patch [bz#1722839] +- kvm-block-file-posix-Unaligned-O_DIRECT-block-status.patch [bz#1588356] +- kvm-iotests-Test-unaligned-raw-images-with-O_DIRECT.patch [bz#1588356] +- kvm-rh-set-CONFIG_BOCHS_DISPLAY-y-for-x86.patch [bz#1707118] +- Resolves: bz#1588356 + (qemu crashed on the source host when do storage migration with source qcow2 disk created by 'qemu-img') +- Resolves: bz#1707118 + (enable device: bochs-display (QEMU)) +- Resolves: bz#1712717 + (CVE-2019-12155 qemu-kvm: QEMU: qxl: null pointer dereference while releasing spice resources [rhel-av-8]) +- Resolves: bz#1720306 + (VM failed to start with error "failed to install seccomp syscall filter in the kernel") +- Resolves: bz#1722839 + ([Intel 8.1 FEAT] MDS_NO exposure to guest - Fast Train) + +* Tue Jun 11 2019 Danilo Cesar Lemes de Paula - 4.0.0-4.el8 +- kvm-Disable-VXHS-support.patch [bz#1714937] +- kvm-aarch64-Add-virt-rhel8.1.0-machine-type-for-ARM.patch [bz#1713735] +- kvm-aarch64-Allow-ARM-VIRT-iommu-option-in-RHEL8.1-machi.patch [bz#1713735] +- kvm-usb-call-reset-handler-before-updating-state.patch [bz#1713679] +- kvm-usb-host-skip-reset-for-untouched-devices.patch [bz#1713679] +- kvm-usb-host-avoid-libusb_set_configuration-calls.patch [bz#1713679] +- kvm-aarch64-Compile-out-IOH3420.patch [bz#1627283] +- kvm-vl-Fix-drive-blockdev-persistent-reservation-managem.patch [bz#1714891] +- kvm-vl-Document-why-objects-are-delayed.patch [bz#1714891] +- Resolves: bz#1627283 + (Compile out IOH3420 on aarch64) +- Resolves: bz#1713679 + (Detached device when trying to upgrade USB device firmware when in doing USB Passthrough via QEMU) +- Resolves: bz#1713735 + (Allow ARM VIRT iommu option in RHEL8.1 machine) +- Resolves: bz#1714891 + (Guest with persistent reservation manager for a disk fails to start) +- Resolves: bz#1714937 + (Disable VXHS support) + +* Tue May 28 2019 Danilo Cesar Lemes de Paula - 4.0.0-3.el8 +- kvm-redhat-fix-cut-n-paste-garbage-in-hw_compat-comments.patch [bz#1709726] +- kvm-compat-Generic-hw_compat_rhel_8_0.patch [bz#1709726] +- kvm-redhat-sync-pseries-rhel7.6.0-with-rhel-av-8.0.1.patch [bz#1709726] +- kvm-redhat-define-pseries-rhel8.1.0-machine-type.patch [bz#1709726] +- Resolves: bz#1709726 + (Forward and backward migration failed with "qemu-kvm: error while loading state for instance 0x0 of device 'spapr'") + +* Sat May 25 2019 Danilo Cesar Lemes de Paula - 4.0.0-2.el8 +- kvm-target-i386-define-md-clear-bit.patch [bz#1703297 bz#1703304 bz#1703310 bz#1707274] +- Resolves: bz#1703297 + (CVE-2018-12126 virt:8.0.0/qemu-kvm: hardware: Microarchitectural Store Buffer Data Sampling (MSBDS) [rhel-av-8]) +- Resolves: bz#1703304 + (CVE-2018-12130 virt:8.0.0/qemu-kvm: hardware: Microarchitectural Fill Buffer Data Sampling (MFBDS) [rhel-av-8]) +- Resolves: bz#1703310 + (CVE-2018-12127 virt:8.0.0/qemu-kvm: hardware: Micro-architectural Load Port Data Sampling - Information Leak (MLPDS) [rhel-av-8]) +- Resolves: bz#1707274 + (CVE-2019-11091 virt:8.0.0/qemu-kvm: hardware: Microarchitectural Data Sampling Uncacheable Memory (MDSUM) [rhel-av-8.1.0]) + +* Wed May 15 2019 Danilo Cesar Lemes de Paula - 3.1.0-26.el8 +- kvm-target-ppc-spapr-Add-SPAPR_CAP_LARGE_DECREMENTER.patch [bz#1698711] +- kvm-target-ppc-spapr-Add-workaround-option-to-SPAPR_CAP_.patch [bz#1698711] +- kvm-target-ppc-spapr-Add-SPAPR_CAP_CCF_ASSIST.patch [bz#1698711] +- kvm-target-ppc-tcg-make-spapr_caps-apply-cap-cfpc-sbbc-i.patch [bz#1698711] +- kvm-target-ppc-spapr-Enable-mitigations-by-default-for-p.patch [bz#1698711] +- kvm-slirp-ensure-there-is-enough-space-in-mbuf-to-null-t.patch [bz#1693076] +- kvm-slirp-don-t-manipulate-so_rcv-in-tcp_emu.patch [bz#1693076] +- Resolves: bz#1693076 + (CVE-2019-6778 qemu-kvm: QEMU: slirp: heap buffer overflow in tcp_emu() [rhel-av-8]) +- Resolves: bz#1698711 + (Enable Spectre / Meltdown mitigations by default in pseries-rhel8.0.0 machine type) + +* Mon May 06 2019 Danilo Cesar Lemes de Paula - 3.1.0-25.el8 +- kvm-redhat-enable-tpmdev-passthrough.patch [bz#1688312] +- kvm-exec-Only-count-mapped-memory-backends-for-qemu_getr.patch [bz#1680492] +- kvm-Enable-libpmem-to-support-nvdimm.patch [bz#1705149] +- Resolves: bz#1680492 + (Qemu quits suddenly while system_reset after hot-plugging unsupported memory by compatible guest on P9 with 1G huge page set) +- Resolves: bz#1688312 + ([RFE] enable TPM passthrough at compile time (qemu-kvm)) +- Resolves: bz#1705149 + (libpmem support is not enabled in qemu-kvm) + +* Fri Apr 26 2019 Danilo Cesar Lemes de Paula - 3.1.0-24.el8 +- kvm-x86-host-phys-bits-limit-option.patch [bz#1688915] +- kvm-rhel-Set-host-phys-bits-limit-48-on-rhel-machine-typ.patch [bz#1688915] +- Resolves: bz#1688915 + ([Intel 8.0 Alpha] physical bits should <= 48 when host with 5level paging &EPT5 and qemu command with "-cpu qemu64" parameters.) + +* Tue Apr 23 2019 Danilo Cesar Lemes de Paula - 3.1.0-23.el8 +- kvm-device_tree-Fix-integer-overflowing-in-load_device_t.patch [bz#1693173] +- Resolves: bz#1693173 + (CVE-2018-20815 qemu-kvm: QEMU: device_tree: heap buffer overflow while loading device tree blob [rhel-av-8]) + +* Mon Apr 15 2019 Danilo Cesar Lemes de Paula - 3.1.0-22.el8 +- kvm-i386-kvm-Disable-arch_capabilities-if-MSR-can-t-be-s.patch [bz#1687578] +- kvm-i386-Make-arch_capabilities-migratable.patch [bz#1687578] +- Resolves: bz#1687578 + (Incorrect CVE vulnerabilities reported on Cascade Lake cpus) + +* Thu Apr 11 2019 Danilo Cesar Lemes de Paula - 3.1.0-21.el8 +- kvm-Remove-7-qcow2-and-luks-iotests-that-are-taking-25-s.patch [bz#1683473] +- kvm-spapr-fix-out-of-bounds-write-in-spapr_populate_drme.patch [bz#1674438] +- kvm-qcow2-include-LUKS-payload-overhead-in-qemu-img-meas.patch [bz#1655065] +- kvm-iotests-add-LUKS-payload-overhead-to-178-qemu-img-me.patch [bz#1655065] +- kvm-vnc-detect-and-optimize-pageflips.patch [bz#1666206] +- kvm-Load-kvm-module-during-boot.patch [bz#1676907 bz#1685995] +- kvm-hostmem-file-reject-invalid-pmem-file-sizes.patch [bz#1669053] +- kvm-iotests-Fix-test-200-on-s390x-without-virtio-pci.patch [bz#1687582] +- kvm-block-file-posix-do-not-fail-on-unlock-bytes.patch [bz#1652572] +- Resolves: bz#1652572 + (QEMU core dumped if stop nfs service during migration) +- Resolves: bz#1655065 + ([rhel.8.0][fast train]'qemu-img measure' size does not match the real allocated size for luks-inside-qcow2 image) +- Resolves: bz#1666206 + (vnc server should detect page-flips and avoid sending fullscreen updates then.) +- Resolves: bz#1669053 + (Guest call trace when boot with nvdimm device backed by /dev/dax) +- Resolves: bz#1674438 + (RHEL8.0 - Guest reboot fails after memory hotplug multiple times (kvm)) +- Resolves: bz#1676907 + (/dev/kvm device exists but kernel module is not loaded on boot up causing VM start to fail in libvirt) +- Resolves: bz#1683473 + (Remove 7 qcow2 & luks iotests from rhel8 fast train build %check phase) +- Resolves: bz#1685995 + (/dev/kvm device exists but kernel module is not loaded on boot up causing VM start to fail in libvirt) +- Resolves: bz#1687582 + (QEMU IOTEST 200 fails with 'virtio-scsi-pci is not a valid device model name') + +* Fri Mar 15 2019 Danilo Cesar Lemes de Paula - 3.1.0-20.el8 +- kvm-i386-Add-stibp-flag-name.patch [bz#1686260] +- Resolves: bz#1686260 + (stibp is missing on qemu 3.0 and qemu 3.1) + +* Fri Mar 15 2019 Danilo Cesar Lemes de Paula - 3.1.0-19.el8 +- kvm-migration-Fix-cancel-state.patch [bz#1608649] +- kvm-migration-rdma-Fix-qemu_rdma_cleanup-null-check.patch [bz#1608649] +- Resolves: bz#1608649 + (Query-migrate get "failed" status after migrate-cancel) + +* Tue Feb 26 2019 Danilo Cesar Lemes de Paula - 3.1.0-18.el8 +- kvm-target-i386-Disable-MPX-support-on-named-CPU-models.patch [bz#1661030] +- kvm-i386-remove-the-new-CPUID-PCONFIG-from-Icelake-Serve.patch [bz#1661515] +- kvm-i386-remove-the-INTEL_PT-CPUID-bit-from-named-CPU-mo.patch [bz#1661515] +- kvm-Revert-i386-Add-CPUID-bit-for-PCONFIG.patch [bz#1661515] +- Resolves: bz#1661030 + (Remove MPX support from 8.0 machine types) +- Resolves: bz#1661515 + (Remove PCONFIG and INTEL_PT from Icelake-* CPU models) + +* Tue Feb 26 2019 Danilo Cesar Lemes de Paula - 3.1.0-17.el8 +- kvm-block-Apply-auto-read-only-for-ro-whitelist-drivers.patch [bz#1678968] +- Resolves: bz#1678968 + (-blockdev: auto-read-only is ineffective for drivers on read-only whitelist) + +* Mon Feb 25 2019 Danilo Cesar Lemes de Paula - 3.1.0-16.el8 +- kvm-fdc-Revert-downstream-disablement-of-device-floppy.patch [bz#1664997] +- kvm-fdc-Restrict-floppy-controllers-to-RHEL-7-machine-ty.patch [bz#1664997] +- Resolves: bz#1664997 + (Restrict floppy device to RHEL-7 machine types) + +* Wed Feb 13 2019 Danilo Cesar Lemes de Paula - 3.1.0-15.el8 +- kvm-Add-raw-qcow2-nbd-and-luks-iotests-to-run-during-the.patch [bz#1664855] +- kvm-Introduce-the-qemu-kvm-tests-rpm.patch [bz#1669924] +- Resolves: bz#1664855 + (Run iotests in qemu-kvm build %check phase) +- Resolves: bz#1669924 + (qemu-kvm packaging: Package the avocado_qemu tests and qemu-iotests in a new rpm) + +* Tue Feb 12 2019 Danilo Cesar Lemes de Paula - 3.1.0-14.el8 +- kvm-doc-fix-the-configuration-path.patch [bz#1644985] +- Resolves: bz#1644985 + (The "fsfreeze-hook" script path shown by command "qemu-ga --help" or "man qemu-ga" is wrong - Fast Train) + +* Mon Feb 11 2019 Danilo Cesar Lemes de Paula - 3.1.0-13.el8 +- kvm-Acceptance-tests-add-Linux-initrd-checking-test.patch [bz#1669922] +- kvm-mmap-alloc-unfold-qemu_ram_mmap.patch [bz#1671519] +- kvm-mmap-alloc-fix-hugetlbfs-misaligned-length-in-ppc64.patch [bz#1671519] +- kvm-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch [bz#1653590] +- kvm-block-Fix-invalidate_cache-error-path-for-parent-act.patch [bz#1673014] +- kvm-virtio-scsi-Move-BlockBackend-back-to-the-main-AioCo.patch [bz#1656276 bz#1662508] +- kvm-scsi-disk-Acquire-the-AioContext-in-scsi_-_realize.patch [bz#1656276 bz#1662508] +- kvm-virtio-scsi-Forbid-devices-with-different-iothreads-.patch [bz#1656276 bz#1662508] +- Resolves: bz#1653590 + ([Fast train]had better stop qemu immediately while guest was making use of an improper page size) +- Resolves: bz#1656276 + (qemu-kvm core dumped after hotplug the deleted disk with iothread parameter) +- Resolves: bz#1662508 + (Qemu core dump when start guest with two disks using same drive) +- Resolves: bz#1669922 + (Backport avocado-qemu tests for QEMU 3.1) +- Resolves: bz#1671519 + (RHEL8.0 Snapshot3 - qemu doesn't free up hugepage memory when hotplug/hotunplug using memory-backend-file (qemu-kvm)) +- Resolves: bz#1673014 + (Local VM and migrated VM on the same host can run with same RAW file as visual disk source while without shareable configured or lock manager enabled) + +* Fri Feb 08 2019 Danilo Cesar Lemes de Paula - 3.1.0-12.el8 +- kvm-io-ensure-UNIX-client-doesn-t-unlink-server-socket.patch [bz#1665896] +- kvm-scsi-disk-Don-t-use-empty-string-as-device-id.patch [bz#1668248] +- kvm-scsi-disk-Add-device_id-property.patch [bz#1668248] +- Resolves: bz#1665896 + (VNC unix listener socket is deleted after first client quits) +- Resolves: bz#1668248 + ("An unknown error has occurred" when using cdrom to install the system with two blockdev disks.(when choose installation destination)) + +* Thu Jan 31 2019 Danilo Cesar Lemes de Paula - 3.1.0-11.el8 +- kvm-Fix-fsfreeze-hook-path-in-the-man-page.patch [bz#1644985] +- kvm-json-Fix-handling-when-not-interpolating.patch [bz#1668244] +- Resolves: bz#1644985 + (The "fsfreeze-hook" script path shown by command "qemu-ga --help" or "man qemu-ga" is wrong - Fast Train) +- Resolves: bz#1668244 + (qemu-img: /var/tmp/v2vovl9951f8.qcow2: CURL: Error opening file: The requested URL returned error: 404 Not Found) + +* Tue Jan 29 2019 Danilo Cesar Lemes de Paula - 3.1.0-10.el8 +- kvm-throttle-groups-fix-restart-coroutine-iothread-race.patch [bz#1655947] +- kvm-iotests-add-238-for-throttling-tgm-unregister-iothre.patch [bz#1655947] +- Resolves: bz#1655947 + (qemu-kvm core dumped after unplug the device which was set io throttling parameters) + +* Tue Jan 29 2019 Danilo Cesar Lemes de Paula - 3.1.0-9.el8 +- kvm-migration-rdma-unregister-fd-handler.patch [bz#1666601] +- kvm-s390x-tod-Properly-stop-the-KVM-TOD-while-the-guest-.patch [bz#1659127] +- kvm-hw-s390x-Fix-bad-mask-in-time2tod.patch [bz#1659127] +- Resolves: bz#1659127 + (Stress guest and stop it, then do live migration, guest hit call trace on destination end) +- Resolves: bz#1666601 + ([q35] dst qemu core dumped when do rdma migration with Mellanox IB QDR card) + +* Thu Jan 24 2019 Danilo Cesar Lemes de Paula - 3.1.0-7.el8 +- kvm-i386-kvm-expose-HV_CPUID_ENLIGHTMENT_INFO.EAX-and-HV.patch [bz#1653511] +- kvm-i386-kvm-add-a-comment-explaining-why-.feat_names-ar.patch [bz#1653511] +- Resolves: bz#1653511 + (qemu doesn't report all support cpu features which cause libvirt cannot get the support status of hv_tlbflush) + +* Wed Jan 23 2019 Danilo Cesar Lemes de Paula - 3.1.0-6.el8 +- kvm-spapr-Fix-ibm-max-associativity-domains-property-num.patch [bz#1653114] +- kvm-cpus-ignore-ESRCH-in-qemu_cpu_kick_thread.patch [bz#1668205] +- Resolves: bz#1653114 + (Incorrect NUMA nodes passed to qemu-kvm guest in ibm,max-associativity-domains property) +- Resolves: bz#1668205 + (Guest quit with error when hotunplug cpu) + +* Mon Jan 21 2019 Danilo Cesar Lemes de Paula - 3.1.0-5.el8 +- kvm-virtio-Helper-for-registering-virtio-device-types.patch [bz#1648023] +- kvm-virtio-Provide-version-specific-variants-of-virtio-P.patch [bz#1648023] +- kvm-globals-Allow-global-properties-to-be-optional.patch [bz#1648023] +- kvm-virtio-Make-disable-legacy-disable-modern-compat-pro.patch [bz#1648023] +- kvm-aarch64-Add-virt-rhel8.0.0-machine-type-for-ARM.patch [bz#1656504] +- kvm-aarch64-Set-virt-rhel8.0.0-max_cpus-to-512.patch [bz#1656504] +- kvm-aarch64-Use-256MB-ECAM-region-by-default.patch [bz#1656504] +- Resolves: bz#1648023 + (Provide separate device types for transitional virtio PCI devices - Fast Train) +- Resolves: bz#1656504 + (Machine types for qemu-kvm based on rebase to qemu-3.1 (aarch64)) + +* Fri Jan 11 2019 Danilo Cesar Lemes de Paula - 3.1.0-4.el8 +- kvm-hw-s390x-s390-virtio-ccw-Add-machine-types-for-RHEL8.patch [bz#1656510] +- kvm-spapr-Add-H-Call-H_HOME_NODE_ASSOCIATIVITY.patch [bz#1661967] +- kvm-redhat-Fixing-.gitpublish-to-include-AV-information.patch [] +- Resolves: bz#1656510 + (Machine types for qemu-kvm based on rebase to qemu-3.1 (s390x)) +- Resolves: bz#1661967 + (Kernel prints the message "VPHN is not supported. Disabling polling...") + +* Thu Jan 03 2019 Danilo Cesar Lemes de Paula - 3.1.0-3.el8 +- kvm-redhat-define-pseries-rhel8.0.0-machine-type.patch [bz#1656508] +- Resolves: bz#1656508 + (Machine types for qemu-kvm based on rebase to qemu-3.1 (ppc64le)) + +* Fri Dec 21 2018 Danilo Cesar Lemes de Paula - 3.1.0-2.el8 +- kvm-pc-7.5-compat-entries.patch [bz#1655820] +- kvm-compat-Generic-HW_COMPAT_RHEL7_6.patch [bz#1655820] +- kvm-pc-PC_RHEL7_6_COMPAT.patch [bz#1655820] +- kvm-pc-Add-compat-for-pc-i440fx-rhel7.6.0-machine-type.patch [bz#1655820] +- kvm-pc-Add-pc-q35-8.0.0-machine-type.patch [bz#1655820] +- kvm-pc-Add-x-migrate-smi-count-off-to-PC_RHEL7_6_COMPAT.patch [bz#1655820] +- kvm-clear-out-KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT-for.patch [bz#1659604] +- kvm-Add-edk2-Requires-to-qemu-kvm.patch [bz#1660208] +- Resolves: bz#1655820 + (Can't migarate between rhel8 and rhel7 when guest has device "video") +- Resolves: bz#1659604 + (8->7 migration failed: qemu-kvm: error: failed to set MSR 0x4b564d02 to 0x27fc13285) +- Resolves: bz#1660208 + (qemu-kvm: Should depend on the architecture-appropriate guest firmware) + +* Thu Dec 13 2018 Danilo Cesar Lemes de Paula - 3.1.0-1.el8 +- Rebase to qemu-kvm 3.1.0 + +* Tue Dec 11 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-47 +- kvm-Disable-CONFIG_IPMI-and-CONFIG_I2C-for-ppc64.patch [bz#1640044] +- kvm-Disable-CONFIG_CAN_BUS-and-CONFIG_CAN_SJA1000.patch [bz#1640042] +- Resolves: bz#1640042 + (Disable CONFIG_CAN_BUS and CONFIG_CAN_SJA1000 config switches) +- Resolves: bz#1640044 + (Disable CONFIG_I2C and CONFIG_IPMI in default-configs/ppc64-softmmu.mak) + +* Tue Dec 11 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-46 +- kvm-qcow2-Give-the-refcount-cache-the-minimum-possible-s.patch [bz#1656507] +- kvm-docs-Document-the-new-default-sizes-of-the-qcow2-cac.patch [bz#1656507] +- kvm-qcow2-Fix-Coverity-warning-when-calculating-the-refc.patch [bz#1656507] +- kvm-include-Add-IEC-binary-prefixes-in-qemu-units.h.patch [bz#1656507] +- kvm-qcow2-Options-documentation-fixes.patch [bz#1656507] +- kvm-include-Add-a-lookup-table-of-sizes.patch [bz#1656507] +- kvm-qcow2-Make-sizes-more-humanly-readable.patch [bz#1656507] +- kvm-qcow2-Avoid-duplication-in-setting-the-refcount-cach.patch [bz#1656507] +- kvm-qcow2-Assign-the-L2-cache-relatively-to-the-image-si.patch [bz#1656507] +- kvm-qcow2-Increase-the-default-upper-limit-on-the-L2-cac.patch [bz#1656507] +- kvm-qcow2-Resize-the-cache-upon-image-resizing.patch [bz#1656507] +- kvm-qcow2-Set-the-default-cache-clean-interval-to-10-min.patch [bz#1656507] +- kvm-qcow2-Explicit-number-replaced-by-a-constant.patch [bz#1656507] +- kvm-block-backend-Set-werror-rerror-defaults-in-blk_new.patch [bz#1657637] +- kvm-qcow2-Fix-cache-clean-interval-documentation.patch [bz#1656507] +- Resolves: bz#1656507 + ([RHEL.8] qcow2 cache is too small) +- Resolves: bz#1657637 + (Wrong werror default for -device drive=) + +* Thu Dec 06 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-45 +- kvm-target-ppc-add-basic-support-for-PTCR-on-POWER9.patch [bz#1639069] +- kvm-linux-headers-Update-for-nested-KVM-HV-downstream-on.patch [bz#1639069] +- kvm-target-ppc-Add-one-reg-id-for-ptcr.patch [bz#1639069] +- kvm-ppc-spapr_caps-Add-SPAPR_CAP_NESTED_KVM_HV.patch [bz#1639069] +- kvm-Re-enable-CONFIG_HYPERV_TESTDEV.patch [bz#1651195] +- kvm-qxl-use-guest_monitor_config-for-local-renderer.patch [bz#1610163] +- kvm-Declare-cirrus-vga-as-deprecated.patch [bz#1651994] +- kvm-Do-not-build-bluetooth-support.patch [bz#1654651] +- kvm-vfio-helpers-Fix-qemu_vfio_open_pci-crash.patch [bz#1645840] +- kvm-balloon-Allow-multiple-inhibit-users.patch [bz#1650272] +- kvm-Use-inhibit-to-prevent-ballooning-without-synchr.patch [bz#1650272] +- kvm-vfio-Inhibit-ballooning-based-on-group-attachment-to.patch [bz#1650272] +- kvm-vfio-ccw-pci-Allow-devices-to-opt-in-for-ballooning.patch [bz#1650272] +- kvm-vfio-pci-Handle-subsystem-realpath-returning-NULL.patch [bz#1650272] +- kvm-vfio-pci-Fix-failure-to-close-file-descriptor-on-err.patch [bz#1650272] +- kvm-postcopy-Synchronize-usage-of-the-balloon-inhibitor.patch [bz#1650272] +- Resolves: bz#1610163 + (guest shows border blurred screen with some resolutions when qemu boot with -device qxl-vga ,and guest on rhel7.6 has no such question) +- Resolves: bz#1639069 + ([IBM 8.0 FEAT] POWER9 - Nested virtualization in RHEL8.0 KVM for ppc64le - qemu-kvm side) +- Resolves: bz#1645840 + (Qemu core dump when hotplug nvme:// drive via -blockdev) +- Resolves: bz#1650272 + (Ballooning is incompatible with vfio assigned devices, but not prevented) +- Resolves: bz#1651195 + (Re-enable hyperv-testdev device) +- Resolves: bz#1651994 + (Declare the "Cirrus VGA" device emulation of QEMU as deprecated in RHEL8) +- Resolves: bz#1654651 + (Qemu: hw: bt: keep bt/* objects from building [rhel-8.0]) + +* Tue Nov 27 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-43 +- kvm-block-Make-more-block-drivers-compile-time-configura.patch [bz#1598842 bz#1598842] +- kvm-RHEL8-Add-disable-configure-options-to-qemu-spec-fil.patch [bz#1598842] +- Resolves: bz#1598842 + (Compile out unused block drivers) + +* Mon Nov 26 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-43 + +- kvm-configure-add-test-for-libudev.patch [bz#1636185] +- kvm-qga-linux-report-disk-serial-number.patch [bz#1636185] +- kvm-qga-linux-return-disk-device-in-guest-get-fsinfo.patch [bz#1636185] +- kvm-qemu-error-introduce-error-warn-_report_once.patch [bz#1625173] +- kvm-intel-iommu-start-to-use-error_report_once.patch [bz#1625173] +- kvm-intel-iommu-replace-more-vtd_err_-traces.patch [bz#1625173] +- kvm-intel_iommu-introduce-vtd_reset_caches.patch [bz#1625173] +- kvm-intel_iommu-better-handling-of-dmar-state-switch.patch [bz#1625173] +- kvm-intel_iommu-move-ce-fetching-out-when-sync-shadow.patch [bz#1625173 bz#1629616] +- kvm-intel_iommu-handle-invalid-ce-for-shadow-sync.patch [bz#1625173 bz#1629616] +- kvm-block-remove-bdrv_dirty_bitmap_make_anon.patch [bz#1518989] +- kvm-block-simplify-code-around-releasing-bitmaps.patch [bz#1518989] +- kvm-hbitmap-Add-advance-param-to-hbitmap_iter_next.patch [bz#1518989] +- kvm-test-hbitmap-Add-non-advancing-iter_next-tests.patch [bz#1518989] +- kvm-block-dirty-bitmap-Add-bdrv_dirty_iter_next_area.patch [bz#1518989] +- kvm-blockdev-backup-add-bitmap-argument.patch [bz#1518989] +- kvm-dirty-bitmap-switch-assert-fails-to-errors-in-bdrv_m.patch [bz#1518989] +- kvm-dirty-bitmap-rename-bdrv_undo_clear_dirty_bitmap.patch [bz#1518989] +- kvm-dirty-bitmap-make-it-possible-to-restore-bitmap-afte.patch [bz#1518989] +- kvm-blockdev-rename-block-dirty-bitmap-clear-transaction.patch [bz#1518989] +- kvm-qapi-add-transaction-support-for-x-block-dirty-bitma.patch [bz#1518989] +- kvm-block-dirty-bitmaps-add-user_locked-status-checker.patch [bz#1518989] +- kvm-block-dirty-bitmaps-fix-merge-permissions.patch [bz#1518989] +- kvm-block-dirty-bitmaps-allow-clear-on-disabled-bitmaps.patch [bz#1518989] +- kvm-block-dirty-bitmaps-prohibit-enable-disable-on-locke.patch [bz#1518989] +- kvm-block-backup-prohibit-backup-from-using-in-use-bitma.patch [bz#1518989] +- kvm-nbd-forbid-use-of-frozen-bitmaps.patch [bz#1518989] +- kvm-bitmap-Update-count-after-a-merge.patch [bz#1518989] +- kvm-iotests-169-drop-deprecated-autoload-parameter.patch [bz#1518989] +- kvm-block-qcow2-improve-error-message-in-qcow2_inactivat.patch [bz#1518989] +- kvm-bloc-qcow2-drop-dirty_bitmaps_loaded-state-variable.patch [bz#1518989] +- kvm-dirty-bitmaps-clean-up-bitmaps-loading-and-migration.patch [bz#1518989] +- kvm-iotests-improve-169.patch [bz#1518989] +- kvm-iotests-169-add-cases-for-source-vm-resuming.patch [bz#1518989] +- kvm-pc-dimm-turn-alignment-assert-into-check.patch [bz#1630116] +- Resolves: bz#1518989 + (RFE: QEMU Incremental live backup) +- Resolves: bz#1625173 + ([NVMe Device Assignment] Guest could not boot up with q35+iommu) +- Resolves: bz#1629616 + (boot guest with q35+vIOMMU+ device assignment, qemu terminal shows "qemu-kvm: VFIO_UNMAP_DMA: -22" when return assigned network devices from vfio driver to ixgbe in guest) +- Resolves: bz#1630116 + (pc_dimm_get_free_addr: assertion failed: (QEMU_ALIGN_UP(address_space_start, align) == address_space_start)) +- Resolves: bz#1636185 + ([RFE] Report disk device name and serial number (qemu-guest-agent on Linux)) + +* Mon Nov 05 2018 Danilo Cesar Lemes de Paula - 2.12.0-42.el8 +- kvm-luks-Allow-share-rw-on.patch [bz#1629701] +- kvm-redhat-reenable-gluster-support.patch [bz#1599340] +- kvm-redhat-bump-libusb-requirement.patch [bz#1627970] +- Resolves: bz#1599340 + (Reenable glusterfs in qemu-kvm once BZ#1567292 gets fixed) +- Resolves: bz#1627970 + (symbol lookup error: /usr/libexec/qemu-kvm: undefined symbol: libusb_set_option) +- Resolves: bz#1629701 + ("share-rw=on" does not work for luks format image - Fast Train) + +* Tue Oct 16 2018 Danilo Cesar Lemes de Paula - 2.12.0-41.el8 +- kvm-block-rbd-pull-out-qemu_rbd_convert_options.patch [bz#1635585] +- kvm-block-rbd-Attempt-to-parse-legacy-filenames.patch [bz#1635585] +- kvm-block-rbd-add-deprecation-documentation-for-filename.patch [bz#1635585] +- kvm-block-rbd-add-iotest-for-rbd-legacy-keyvalue-filenam.patch [bz#1635585] +- Resolves: bz#1635585 + (rbd json format of 7.6 is incompatible with 7.5) + +* Tue Oct 16 2018 Danilo Cesar Lemes de Paula - 2.12.0-40.el8 + +- kvm-vnc-call-sasl_server_init-only-when-required.patch [bz#1609327] +- kvm-nbd-server-fix-NBD_CMD_CACHE.patch [bz#1636142] +- kvm-nbd-fix-NBD_FLAG_SEND_CACHE-value.patch [bz#1636142] +- kvm-test-bdrv-drain-bdrv_drain-works-with-cross-AioConte.patch [bz#1637976] +- kvm-block-Use-bdrv_do_drain_begin-end-in-bdrv_drain_all.patch [bz#1637976] +- kvm-block-Remove-recursive-parameter-from-bdrv_drain_inv.patch [bz#1637976] +- kvm-block-Don-t-manually-poll-in-bdrv_drain_all.patch [bz#1637976] +- kvm-tests-test-bdrv-drain-bdrv_drain_all-works-in-corout.patch [bz#1637976] +- kvm-block-Avoid-unnecessary-aio_poll-in-AIO_WAIT_WHILE.patch [bz#1637976] +- kvm-block-Really-pause-block-jobs-on-drain.patch [bz#1637976] +- kvm-block-Remove-bdrv_drain_recurse.patch [bz#1637976] +- kvm-test-bdrv-drain-Add-test-for-node-deletion.patch [bz#1637976] +- kvm-block-Drain-recursively-with-a-single-BDRV_POLL_WHIL.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-node-deletion-in-subtree-recurs.patch [bz#1637976] +- kvm-block-Don-t-poll-in-parent-drain-callbacks.patch [bz#1637976] +- kvm-test-bdrv-drain-Graph-change-through-parent-callback.patch [bz#1637976] +- kvm-block-Defer-.bdrv_drain_begin-callback-to-polling-ph.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-that-bdrv_drain_invoke-doesn-t-.patch [bz#1637976] +- kvm-block-Allow-AIO_WAIT_WHILE-with-NULL-ctx.patch [bz#1637976] +- kvm-block-Move-bdrv_drain_all_begin-out-of-coroutine-con.patch [bz#1637976] +- kvm-block-ignore_bds_parents-parameter-for-drain-functio.patch [bz#1637976] +- kvm-block-Allow-graph-changes-in-bdrv_drain_all_begin-en.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-graph-changes-in-drain_all-sect.patch [bz#1637976] +- kvm-block-Poll-after-drain-on-attaching-a-node.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-bdrv_append-to-drained-node.patch [bz#1637976] +- kvm-block-linux-aio-acquire-AioContext-before-qemu_laio_.patch [bz#1637976] +- kvm-util-async-use-qemu_aio_coroutine_enter-in-co_schedu.patch [bz#1637976] +- kvm-job-Fix-nested-aio_poll-hanging-in-job_txn_apply.patch [bz#1637976] +- kvm-job-Fix-missing-locking-due-to-mismerge.patch [bz#1637976] +- kvm-blockjob-Wake-up-BDS-when-job-becomes-idle.patch [bz#1637976] +- kvm-aio-wait-Increase-num_waiters-even-in-home-thread.patch [bz#1637976] +- kvm-test-bdrv-drain-Drain-with-block-jobs-in-an-I-O-thre.patch [bz#1637976] +- kvm-test-blockjob-Acquire-AioContext-around-job_cancel_s.patch [bz#1637976] +- kvm-job-Use-AIO_WAIT_WHILE-in-job_finish_sync.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-AIO_WAIT_WHILE-in-completion-ca.patch [bz#1637976] +- kvm-block-Add-missing-locking-in-bdrv_co_drain_bh_cb.patch [bz#1637976] +- kvm-block-backend-Add-.drained_poll-callback.patch [bz#1637976] +- kvm-block-backend-Fix-potential-double-blk_delete.patch [bz#1637976] +- kvm-block-backend-Decrease-in_flight-only-after-callback.patch [bz#1637976] +- kvm-blockjob-Lie-better-in-child_job_drained_poll.patch [bz#1637976] +- kvm-block-Remove-aio_poll-in-bdrv_drain_poll-variants.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-nested-poll-in-bdrv_drain_poll_.patch [bz#1637976] +- kvm-job-Avoid-deadlocks-in-job_completed_txn_abort.patch [bz#1637976] +- kvm-test-bdrv-drain-AIO_WAIT_WHILE-in-job-.commit-.abort.patch [bz#1637976] +- kvm-test-bdrv-drain-Fix-outdated-comments.patch [bz#1637976] +- kvm-block-Use-a-single-global-AioWait.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-draining-job-source-child-and-p.patch [bz#1637976] +- kvm-qemu-img-Fix-assert-when-mapping-unaligned-raw-file.patch [bz#1639374] +- kvm-iotests-Add-test-221-to-catch-qemu-img-map-regressio.patch [bz#1639374] +- Resolves: bz#1609327 + (qemu-kvm[37046]: Could not find keytab file: /etc/qemu/krb5.tab: Unknown error 49408) +- Resolves: bz#1636142 + (qemu NBD_CMD_CACHE flaws impacting non-qemu NBD clients) +- Resolves: bz#1637976 + (Crashes and hangs with iothreads vs. block jobs) +- Resolves: bz#1639374 + (qemu-img map 'Aborted (core dumped)' when specifying a plain file) + +* Tue Oct 16 2018 Danilo Cesar Lemes de Paula - +- kvm-linux-headers-update.patch [bz#1508142] +- kvm-s390x-cpumodel-Set-up-CPU-model-for-AP-device-suppor.patch [bz#1508142] +- kvm-s390x-kvm-enable-AP-instruction-interpretation-for-g.patch [bz#1508142] +- kvm-s390x-ap-base-Adjunct-Processor-AP-object-model.patch [bz#1508142] +- kvm-s390x-vfio-ap-Introduce-VFIO-AP-device.patch [bz#1508142] +- kvm-s390-doc-detailed-specifications-for-AP-virtualizati.patch [bz#1508142] +- Resolves: bz#1508142 + ([IBM 8.0 FEAT] KVM: Guest-dedicated Crypto Adapters - qemu part) + +* Mon Oct 15 2018 Danilo Cesar Lemes de Paula - 2.12.0-38.el8 +- kvm-Revert-hw-acpi-build-build-SRAT-memory-affinity-stru.patch [bz#1609235] +- kvm-add-udev-kvm-check.patch [bz#1552663] +- kvm-aio-posix-Don-t-count-ctx-notifier-as-progress-when-.patch [bz#1623085] +- kvm-aio-Do-aio_notify_accept-only-during-blocking-aio_po.patch [bz#1623085] +- kvm-aio-posix-fix-concurrent-access-to-poll_disable_cnt.patch [bz#1632622] +- kvm-aio-posix-compute-timeout-before-polling.patch [bz#1632622] +- kvm-aio-posix-do-skip-system-call-if-ctx-notifier-pollin.patch [bz#1632622] +- kvm-intel-iommu-send-PSI-always-even-if-across-PDEs.patch [bz#1450712] +- kvm-intel-iommu-remove-IntelIOMMUNotifierNode.patch [bz#1450712] +- kvm-intel-iommu-add-iommu-lock.patch [bz#1450712] +- kvm-intel-iommu-only-do-page-walk-for-MAP-notifiers.patch [bz#1450712] +- kvm-intel-iommu-introduce-vtd_page_walk_info.patch [bz#1450712] +- kvm-intel-iommu-pass-in-address-space-when-page-walk.patch [bz#1450712] +- kvm-intel-iommu-trace-domain-id-during-page-walk.patch [bz#1450712] +- kvm-util-implement-simple-iova-tree.patch [bz#1450712] +- kvm-intel-iommu-rework-the-page-walk-logic.patch [bz#1450712] +- kvm-i386-define-the-ssbd-CPUID-feature-bit-CVE-2018-3639.patch [bz#1633928] +- Resolves: bz#1450712 + (Booting nested guest with vIOMMU, the assigned network devices can not receive packets (qemu)) +- Resolves: bz#1552663 + (81-kvm-rhel.rules is no longer part of initscripts) +- Resolves: bz#1609235 + (Win2016 guest can't recognize pc-dimm hotplugged to node 0) +- Resolves: bz#1623085 + (VM doesn't boot from HD) +- Resolves: bz#1632622 + (~40% virtio_blk disk performance drop for win2012r2 guest when comparing qemu-kvm-rhev-2.12.0-9 with qemu-kvm-rhev-2.12.0-12) +- Resolves: bz#1633928 + (CVE-2018-3639 qemu-kvm: hw: cpu: speculative store bypass [rhel-8.0]) + +* Fri Oct 12 2018 Danilo Cesar Lemes de Paula - 2.12.0-37.el8 +- kvm-block-for-jobs-do-not-clear-user_paused-until-after-.patch [bz#1635583] +- kvm-iotests-Add-failure-matching-to-common.qemu.patch [bz#1635583] +- kvm-block-iotest-to-catch-abort-on-forced-blockjob-cance.patch [bz#1635583] +- Resolves: bz#1635583 + (Quitting VM causes qemu core dump once the block mirror job paused for no enough target space) + +* Fri Oct 12 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-36 +- kvm-check-Only-test-ivshm-when-it-is-compiled-in.patch [bz#1621817] +- kvm-Disable-ivshmem.patch [bz#1621817] +- kvm-mirror-Fail-gracefully-for-source-target.patch [bz#1637963] +- kvm-commit-Add-top-node-base-node-options.patch [bz#1637970] +- kvm-qemu-iotests-Test-commit-with-top-node-base-node.patch [bz#1637970] +- Resolves: bz#1621817 + (Disable IVSHMEM in RHEL 8) +- Resolves: bz#1637963 + (Segfault on 'blockdev-mirror' with same node as source and target) +- Resolves: bz#1637970 + (allow using node-names with block-commit) + +* Thu Oct 11 2018 Danilo Cesar Lemes de Paula - 2.12.0-35.el8 +- kvm-redhat-make-the-plugins-executable.patch [bz#1638304] +- Resolves: bz#1638304 + (the driver packages lack all the library Requires) + +* Thu Oct 11 2018 Danilo Cesar Lemes de Paula - 2.12.0-34.el8 +- kvm-seccomp-allow-sched_setscheduler-with-SCHED_IDLE-pol.patch [bz#1618356] +- kvm-seccomp-use-SIGSYS-signal-instead-of-killing-the-thr.patch [bz#1618356] +- kvm-seccomp-prefer-SCMP_ACT_KILL_PROCESS-if-available.patch [bz#1618356] +- kvm-configure-require-libseccomp-2.2.0.patch [bz#1618356] +- kvm-seccomp-set-the-seccomp-filter-to-all-threads.patch [bz#1618356] +- kvm-memory-cleanup-side-effects-of-memory_region_init_fo.patch [bz#1600365] +- Resolves: bz#1600365 + (QEMU core dumped when hotplug memory exceeding host hugepages and with discard-data=yes) +- Resolves: bz#1618356 + (qemu-kvm: Qemu: seccomp: blacklist is not applied to all threads [rhel-8]) + +* Fri Oct 05 2018 Danilo Cesar Lemes de Paula - 2.12.0-33.el8 +- kvm-migration-postcopy-Clear-have_listen_thread.patch [bz#1608765] +- kvm-migration-cleanup-in-error-paths-in-loadvm.patch [bz#1608765] +- kvm-jobs-change-start-callback-to-run-callback.patch [bz#1632939] +- kvm-jobs-canonize-Error-object.patch [bz#1632939] +- kvm-jobs-add-exit-shim.patch [bz#1632939] +- kvm-block-commit-utilize-job_exit-shim.patch [bz#1632939] +- kvm-block-mirror-utilize-job_exit-shim.patch [bz#1632939] +- kvm-jobs-utilize-job_exit-shim.patch [bz#1632939] +- kvm-block-backup-make-function-variables-consistently-na.patch [bz#1632939] +- kvm-jobs-remove-ret-argument-to-job_completed-privatize-.patch [bz#1632939] +- kvm-jobs-remove-job_defer_to_main_loop.patch [bz#1632939] +- kvm-block-commit-add-block-job-creation-flags.patch [bz#1632939] +- kvm-block-mirror-add-block-job-creation-flags.patch [bz#1632939] +- kvm-block-stream-add-block-job-creation-flags.patch [bz#1632939] +- kvm-block-commit-refactor-commit-to-use-job-callbacks.patch [bz#1632939] +- kvm-block-mirror-don-t-install-backing-chain-on-abort.patch [bz#1632939] +- kvm-block-mirror-conservative-mirror_exit-refactor.patch [bz#1632939] +- kvm-block-stream-refactor-stream-to-use-job-callbacks.patch [bz#1632939] +- kvm-tests-blockjob-replace-Blockjob-with-Job.patch [bz#1632939] +- kvm-tests-test-blockjob-remove-exit-callback.patch [bz#1632939] +- kvm-tests-test-blockjob-txn-move-.exit-to-.clean.patch [bz#1632939] +- kvm-jobs-remove-.exit-callback.patch [bz#1632939] +- kvm-qapi-block-commit-expose-new-job-properties.patch [bz#1632939] +- kvm-qapi-block-mirror-expose-new-job-properties.patch [bz#1632939] +- kvm-qapi-block-stream-expose-new-job-properties.patch [bz#1632939] +- kvm-block-backup-qapi-documentation-fixup.patch [bz#1632939] +- kvm-blockdev-document-transactional-shortcomings.patch [bz#1632939] +- Resolves: bz#1608765 + (After postcopy migration, do savevm and loadvm, guest hang and call trace) +- Resolves: bz#1632939 + (qemu blockjobs other than backup do not support job-finalize or job-dismiss) + +* Fri Sep 28 2018 Danilo Cesar Lemes de Paula - 2.12.0-32.el8 +- kvm-Re-enable-disabled-Hyper-V-enlightenments.patch [bz#1625185] +- kvm-Fix-annocheck-issues.patch [bz#1624164] +- kvm-exec-check-that-alignment-is-a-power-of-two.patch [bz#1630746] +- kvm-curl-Make-sslverify-off-disable-host-as-well-as-peer.patch [bz#1575925] +- Resolves: bz#1575925 + ("SSL: no alternative certificate subject name matches target host name" error even though sslverify = off) +- Resolves: bz#1624164 + (Review annocheck distro flag failures in qemu-kvm) +- Resolves: bz#1625185 + (Re-enable disabled Hyper-V enlightenments) +- Resolves: bz#1630746 + (qemu_ram_mmap: Assertion `is_power_of_2(align)' failed) + +* Tue Sep 11 2018 Danilo Cesar Lemes de Paula - 2.12.0-31.el8 +- kvm-i386-Disable-TOPOEXT-by-default-on-cpu-host.patch [bz#1619804] +- kvm-redhat-enable-opengl-add-build-and-runtime-deps.patch [bz#1618412] +- Resolves: bz#1618412 + (Enable opengl (for intel vgpu display)) +- Resolves: bz#1619804 + (kernel panic in init_amd_cacheinfo) + +* Wed Sep 05 2018 Danilo Cesar Lemes de Paula - 2.12.0-30.el8 +- kvm-redhat-Disable-vhost-crypto.patch [bz#1625668] +- Resolves: bz#1625668 + (Decide if we should disable 'vhost-crypto' or not) + +* Wed Sep 05 2018 Danilo Cesar Lemes de Paula - 2.12.0-29.el8 +- kvm-target-i386-sev-fix-memory-leaks.patch [bz#1615717] +- kvm-i386-Fix-arch_query_cpu_model_expansion-leak.patch [bz#1615717] +- kvm-redhat-Update-build-configuration.patch [bz#1573156] +- Resolves: bz#1573156 + (Update build configure for QEMU 2.12.0) +- Resolves: bz#1615717 + (Memory leaks) + +* Wed Aug 29 2018 Danilo Cesar Lemes de Paula - 2.12.0-27.el8 +- kvm-Fix-libusb-1.0.22-deprecated-libusb_set_debug-with-l.patch [bz#1622656] +- Resolves: bz#1622656 + (qemu-kvm fails to build due to libusb_set_debug being deprecated) + +* Fri Aug 17 2018 Danilo Cesar Lemes de Paula - 2.12.0-26.el8 +- kvm-redhat-remove-extra-in-rhel_rhev_conflicts-macro.patch [bz#1618752] +- Resolves: bz#1618752 + (qemu-kvm can't be installed in RHEL-8 as it Conflicts with itself.) + +* Thu Aug 16 2018 Danilo Cesar Lemes de Paula - 2.12.0-25.el8 +- kvm-Migration-TLS-Fix-crash-due-to-double-cleanup.patch [bz#1594384] +- Resolves: bz#1594384 + (2.12 migration fixes) + +* Tue Aug 14 2018 Danilo Cesar Lemes de Paula - 2.12.0-24.el8 +- kvm-Add-qemu-keymap-to-qemu-kvm-common.patch [bz#1593117] +- Resolves: bz#1593117 + (add qemu-keymap utility) + +* Fri Aug 10 2018 Danilo Cesar Lemes de Paula - 2.12.0-23.el8 +- Fixing an issue with some old command in the spec file + +* Fri Aug 10 2018 Danilo Cesar Lemes de Paula - 2.12.0-22.el8 +- Fix an issue with the build_configure script. +- Resolves: bz#1425820 + (Improve QEMU packaging layout with modularization of the block layer) + + +* Fri Aug 10 2018 Danilo Cesar Lemes de Paula - 2.12.0-20.el8 +- kvm-migration-stop-compressing-page-in-migration-thread.patch [bz#1594384] +- kvm-migration-stop-compression-to-allocate-and-free-memo.patch [bz#1594384] +- kvm-migration-stop-decompression-to-allocate-and-free-me.patch [bz#1594384] +- kvm-migration-detect-compression-and-decompression-error.patch [bz#1594384] +- kvm-migration-introduce-control_save_page.patch [bz#1594384] +- kvm-migration-move-some-code-to-ram_save_host_page.patch [bz#1594384] +- kvm-migration-move-calling-control_save_page-to-the-comm.patch [bz#1594384] +- kvm-migration-move-calling-save_zero_page-to-the-common-.patch [bz#1594384] +- kvm-migration-introduce-save_normal_page.patch [bz#1594384] +- kvm-migration-remove-ram_save_compressed_page.patch [bz#1594384] +- kvm-migration-block-dirty-bitmap-fix-memory-leak-in-dirt.patch [bz#1594384] +- kvm-migration-fix-saving-normal-page-even-if-it-s-been-c.patch [bz#1594384] +- kvm-migration-update-index-field-when-delete-or-qsort-RD.patch [bz#1594384] +- kvm-migration-introduce-decompress-error-check.patch [bz#1594384] +- kvm-migration-Don-t-activate-block-devices-if-using-S.patch [bz#1594384] +- kvm-migration-not-wait-RDMA_CM_EVENT_DISCONNECTED-event-.patch [bz#1594384] +- kvm-migration-block-dirty-bitmap-fix-dirty_bitmap_load.patch [bz#1594384] +- kvm-s390x-add-RHEL-7.6-machine-type-for-ccw.patch [bz#1595718] +- kvm-s390x-cpumodel-default-enable-bpb-and-ppa15-for-z196.patch [bz#1595718] +- kvm-linux-headers-asm-s390-kvm.h-header-sync.patch [bz#1612938] +- kvm-s390x-kvm-add-etoken-facility.patch [bz#1612938] +- Resolves: bz#1594384 + (2.12 migration fixes) +- Resolves: bz#1595718 + (Add ppa15/bpb to the default cpu model for z196 and higher in the 7.6 s390-ccw-virtio machine) +- Resolves: bz#1612938 + (Add etoken support to qemu-kvm for s390x KVM guests) + +* Fri Aug 10 2018 Danilo Cesar Lemes de Paula - 2.12.0-18.el8 + Mass import from RHEL 7.6 qemu-kvm-rhev, including fixes to the following BZs: + +- kvm-AArch64-Add-virt-rhel7.6-machine-type.patch [bz#1558723] +- kvm-cpus-Fix-event-order-on-resume-of-stopped-guest.patch [bz#1566153] +- kvm-qemu-img-Check-post-truncation-size.patch [bz#1523065] +- kvm-vga-catch-depth-0.patch [bz#1575541] +- kvm-Fix-x-hv-max-vps-compat-value-for-7.4-machine-type.patch [bz#1583959] +- kvm-ccid-card-passthru-fix-regression-in-realize.patch [bz#1584984] +- kvm-Use-4-MB-vram-for-cirrus.patch [bz#1542080] +- kvm-spapr_pci-Remove-unhelpful-pagesize-warning.patch [bz#1505664] +- kvm-rpm-Add-nvme-VFIO-driver-to-rw-whitelist.patch [bz#1416180] +- kvm-qobject-Use-qobject_to-instead-of-type-cast.patch [bz#1557995] +- kvm-qobject-Ensure-base-is-at-offset-0.patch [bz#1557995] +- kvm-qobject-use-a-QObjectBase_-struct.patch [bz#1557995] +- kvm-qobject-Replace-qobject_incref-QINCREF-qobject_decre.patch [bz#1557995] +- kvm-qobject-Modify-qobject_ref-to-return-obj.patch [bz#1557995] +- kvm-rbd-Drop-deprecated-drive-parameter-filename.patch [bz#1557995] +- kvm-iscsi-Drop-deprecated-drive-parameter-filename.patch [bz#1557995] +- kvm-block-Add-block-specific-QDict-header.patch [bz#1557995] +- kvm-qobject-Move-block-specific-qdict-code-to-block-qdic.patch [bz#1557995] +- kvm-block-Fix-blockdev-for-certain-non-string-scalars.patch [bz#1557995] +- kvm-block-Fix-drive-for-certain-non-string-scalars.patch [bz#1557995] +- kvm-block-Clean-up-a-misuse-of-qobject_to-in-.bdrv_co_cr.patch [bz#1557995] +- kvm-block-Factor-out-qobject_input_visitor_new_flat_conf.patch [bz#1557995] +- kvm-block-Make-remaining-uses-of-qobject-input-visitor-m.patch [bz#1557995] +- kvm-block-qdict-Simplify-qdict_flatten_qdict.patch [bz#1557995] +- kvm-block-qdict-Tweak-qdict_flatten_qdict-qdict_flatten_.patch [bz#1557995] +- kvm-block-qdict-Clean-up-qdict_crumple-a-bit.patch [bz#1557995] +- kvm-block-qdict-Simplify-qdict_is_list-some.patch [bz#1557995] +- kvm-check-block-qdict-Rename-qdict_flatten-s-variables-f.patch [bz#1557995] +- kvm-check-block-qdict-Cover-flattening-of-empty-lists-an.patch [bz#1557995] +- kvm-block-Fix-blockdev-blockdev-add-for-empty-objects-an.patch [bz#1557995] +- kvm-rbd-New-parameter-auth-client-required.patch [bz#1557995] +- kvm-rbd-New-parameter-key-secret.patch [bz#1557995] +- kvm-block-mirror-honor-ratelimit-again.patch [bz#1572856] +- kvm-block-mirror-Make-cancel-always-cancel-pre-READY.patch [bz#1572856] +- kvm-iotests-Add-test-for-cancelling-a-mirror-job.patch [bz#1572856] +- kvm-iotests-Split-214-off-of-122.patch [bz#1518738] +- kvm-block-Add-COR-filter-driver.patch [bz#1518738] +- kvm-block-BLK_PERM_WRITE-includes-._UNCHANGED.patch [bz#1518738] +- kvm-block-Add-BDRV_REQ_WRITE_UNCHANGED-flag.patch [bz#1518738] +- kvm-block-Set-BDRV_REQ_WRITE_UNCHANGED-for-COR-writes.patch [bz#1518738] +- kvm-block-quorum-Support-BDRV_REQ_WRITE_UNCHANGED.patch [bz#1518738] +- kvm-block-Support-BDRV_REQ_WRITE_UNCHANGED-in-filters.patch [bz#1518738] +- kvm-iotests-Clean-up-wrap-image-in-197.patch [bz#1518738] +- kvm-iotests-Copy-197-for-COR-filter-driver.patch [bz#1518738] +- kvm-iotests-Add-test-for-COR-across-nodes.patch [bz#1518738] +- kvm-qemu-io-Use-purely-string-blockdev-options.patch [bz#1576598] +- kvm-qemu-img-Use-only-string-options-in-img_open_opts.patch [bz#1576598] +- kvm-iotests-Add-test-for-U-force-share-conflicts.patch [bz#1576598] +- kvm-qemu-io-Drop-command-functions-return-values.patch [bz#1519617] +- kvm-qemu-io-Let-command-functions-return-error-code.patch [bz#1519617] +- kvm-qemu-io-Exit-with-error-when-a-command-failed.patch [bz#1519617] +- kvm-iotests.py-Add-qemu_io_silent.patch [bz#1519617] +- kvm-iotests-Let-216-make-use-of-qemu-io-s-exit-code.patch [bz#1519617] +- kvm-qcow2-Repair-OFLAG_COPIED-when-fixing-leaks.patch [bz#1527085] +- kvm-iotests-Repairing-error-during-snapshot-deletion.patch [bz#1527085] +- kvm-block-Make-bdrv_is_writable-public.patch [bz#1588039] +- kvm-qcow2-Do-not-mark-inactive-images-corrupt.patch [bz#1588039] +- kvm-iotests-Add-case-for-a-corrupted-inactive-image.patch [bz#1588039] +- kvm-main-loop-drop-spin_counter.patch [bz#1168213] +- kvm-target-ppc-Factor-out-the-parsing-in-kvmppc_get_cpu_.patch [bz#1560847] +- kvm-target-ppc-Don-t-require-private-l1d-cache-on-POWER8.patch [bz#1560847] +- kvm-ppc-spapr_caps-Don-t-disable-cap_cfpc-on-POWER8-by-d.patch [bz#1560847] +- kvm-qxl-fix-local-renderer-crash.patch [bz#1567733] +- kvm-qemu-img-Amendment-support-implies-create_opts.patch [bz#1537956] +- kvm-block-Add-Error-parameter-to-bdrv_amend_options.patch [bz#1537956] +- kvm-qemu-option-Pull-out-Supported-options-print.patch [bz#1537956] +- kvm-qemu-img-Add-print_amend_option_help.patch [bz#1537956] +- kvm-qemu-img-Recognize-no-creation-support-in-o-help.patch [bz#1537956] +- kvm-iotests-Test-help-option-for-unsupporting-formats.patch [bz#1537956] +- kvm-iotests-Rework-113.patch [bz#1537956] +- kvm-qemu-img-Resolve-relative-backing-paths-in-rebase.patch [bz#1569835] +- kvm-iotests-Add-test-for-rebasing-with-relative-paths.patch [bz#1569835] +- kvm-qemu-img-Special-post-backing-convert-handling.patch [bz#1527898] +- kvm-iotests-Test-post-backing-convert-target-behavior.patch [bz#1527898] +- kvm-migration-calculate-expected_downtime-with-ram_bytes.patch [bz#1564576] +- kvm-sheepdog-Fix-sd_co_create_opts-memory-leaks.patch [bz#1513543] +- kvm-qemu-iotests-reduce-chance-of-races-in-185.patch [bz#1513543] +- kvm-blockjob-do-not-cancel-timer-in-resume.patch [bz#1513543] +- kvm-nfs-Fix-error-path-in-nfs_options_qdict_to_qapi.patch [bz#1513543] +- kvm-nfs-Remove-processed-options-from-QDict.patch [bz#1513543] +- kvm-blockjob-drop-block_job_pause-resume_all.patch [bz#1513543] +- kvm-blockjob-expose-error-string-via-query.patch [bz#1513543] +- kvm-blockjob-Fix-assertion-in-block_job_finalize.patch [bz#1513543] +- kvm-blockjob-Wrappers-for-progress-counter-access.patch [bz#1513543] +- kvm-blockjob-Move-RateLimit-to-BlockJob.patch [bz#1513543] +- kvm-blockjob-Implement-block_job_set_speed-centrally.patch [bz#1513543] +- kvm-blockjob-Introduce-block_job_ratelimit_get_delay.patch [bz#1513543] +- kvm-blockjob-Add-block_job_driver.patch [bz#1513543] +- kvm-blockjob-Update-block-job-pause-resume-documentation.patch [bz#1513543] +- kvm-blockjob-Improve-BlockJobInfo.offset-len-documentati.patch [bz#1513543] +- kvm-job-Create-Job-JobDriver-and-job_create.patch [bz#1513543] +- kvm-job-Rename-BlockJobType-into-JobType.patch [bz#1513543] +- kvm-job-Add-JobDriver.job_type.patch [bz#1513543] +- kvm-job-Add-job_delete.patch [bz#1513543] +- kvm-job-Maintain-a-list-of-all-jobs.patch [bz#1513543] +- kvm-job-Move-state-transitions-to-Job.patch [bz#1513543] +- kvm-job-Add-reference-counting.patch [bz#1513543] +- kvm-job-Move-cancelled-to-Job.patch [bz#1513543] +- kvm-job-Add-Job.aio_context.patch [bz#1513543] +- kvm-job-Move-defer_to_main_loop-to-Job.patch [bz#1513543] +- kvm-job-Move-coroutine-and-related-code-to-Job.patch [bz#1513543] +- kvm-job-Add-job_sleep_ns.patch [bz#1513543] +- kvm-job-Move-pause-resume-functions-to-Job.patch [bz#1513543] +- kvm-job-Replace-BlockJob.completed-with-job_is_completed.patch [bz#1513543] +- kvm-job-Move-BlockJobCreateFlags-to-Job.patch [bz#1513543] +- kvm-blockjob-Split-block_job_event_pending.patch [bz#1513543] +- kvm-job-Add-job_event_.patch [bz#1513543] +- kvm-job-Move-single-job-finalisation-to-Job.patch [bz#1513543] +- kvm-job-Convert-block_job_cancel_async-to-Job.patch [bz#1513543] +- kvm-job-Add-job_drain.patch [bz#1513543] +- kvm-job-Move-.complete-callback-to-Job.patch [bz#1513543] +- kvm-job-Move-job_finish_sync-to-Job.patch [bz#1513543] +- kvm-job-Switch-transactions-to-JobTxn.patch [bz#1513543] +- kvm-job-Move-transactions-to-Job.patch [bz#1513543] +- kvm-job-Move-completion-and-cancellation-to-Job.patch [bz#1513543] +- kvm-block-Cancel-job-in-bdrv_close_all-callers.patch [bz#1513543] +- kvm-job-Add-job_yield.patch [bz#1513543] +- kvm-job-Add-job_dismiss.patch [bz#1513543] +- kvm-job-Add-job_is_ready.patch [bz#1513543] +- kvm-job-Add-job_transition_to_ready.patch [bz#1513543] +- kvm-job-Move-progress-fields-to-Job.patch [bz#1513543] +- kvm-job-Introduce-qapi-job.json.patch [bz#1513543] +- kvm-job-Add-JOB_STATUS_CHANGE-QMP-event.patch [bz#1513543] +- kvm-job-Add-lifecycle-QMP-commands.patch [bz#1513543] +- kvm-job-Add-query-jobs-QMP-command.patch [bz#1513543] +- kvm-blockjob-Remove-BlockJob.driver.patch [bz#1513543] +- kvm-iotests-Move-qmp_to_opts-to-VM.patch [bz#1513543] +- kvm-qemu-iotests-Test-job-with-block-jobs.patch [bz#1513543] +- kvm-vdi-Fix-vdi_co_do_create-return-value.patch [bz#1513543] +- kvm-vhdx-Fix-vhdx_co_create-return-value.patch [bz#1513543] +- kvm-job-Add-error-message-for-failing-jobs.patch [bz#1513543] +- kvm-block-create-Make-x-blockdev-create-a-job.patch [bz#1513543] +- kvm-qemu-iotests-Add-VM.get_qmp_events_filtered.patch [bz#1513543] +- kvm-qemu-iotests-Add-VM.qmp_log.patch [bz#1513543] +- kvm-qemu-iotests-Add-iotests.img_info_log.patch [bz#1513543] +- kvm-qemu-iotests-Add-VM.run_job.patch [bz#1513543] +- kvm-qemu-iotests-iotests.py-helper-for-non-file-protocol.patch [bz#1513543] +- kvm-qemu-iotests-Rewrite-206-for-blockdev-create-job.patch [bz#1513543] +- kvm-qemu-iotests-Rewrite-207-for-blockdev-create-job.patch [bz#1513543] +- kvm-qemu-iotests-Rewrite-210-for-blockdev-create-job.patch [bz#1513543] +- kvm-qemu-iotests-Rewrite-211-for-blockdev-create-job.patch [bz#1513543] +- kvm-qemu-iotests-Rewrite-212-for-blockdev-create-job.patch [bz#1513543] +- kvm-qemu-iotests-Rewrite-213-for-blockdev-create-job.patch [bz#1513543] +- kvm-block-create-Mark-blockdev-create-stable.patch [bz#1513543] +- kvm-jobs-fix-stale-wording.patch [bz#1513543] +- kvm-jobs-fix-verb-references-in-docs.patch [bz#1513543] +- kvm-iotests-Fix-219-s-timing.patch [bz#1513543] +- kvm-iotests-improve-pause_job.patch [bz#1513543] +- kvm-rpm-Whitelist-copy-on-read-block-driver.patch [bz#1518738] +- kvm-rpm-add-throttle-driver-to-rw-whitelist.patch [bz#1591076] +- kvm-usb-host-skip-open-on-pending-postload-bh.patch [bz#1572851] +- kvm-i386-Define-the-Virt-SSBD-MSR-and-handling-of-it-CVE.patch [bz#1574216] +- kvm-i386-define-the-AMD-virt-ssbd-CPUID-feature-bit-CVE-.patch [bz#1574216] +- kvm-block-file-posix-Pass-FD-to-locking-helpers.patch [bz#1519144] +- kvm-block-file-posix-File-locking-during-creation.patch [bz#1519144] +- kvm-iotests-Add-creation-test-to-153.patch [bz#1519144] +- kvm-vhost-user-add-Net-prefix-to-internal-state-structur.patch [bz#1526645] +- kvm-virtio-support-setting-memory-region-based-host-noti.patch [bz#1526645] +- kvm-vhost-user-support-receiving-file-descriptors-in-sla.patch [bz#1526645] +- kvm-osdep-add-wait.h-compat-macros.patch [bz#1526645] +- kvm-vhost-user-bridge-support-host-notifier.patch [bz#1526645] +- kvm-vhost-allow-backends-to-filter-memory-sections.patch [bz#1526645] +- kvm-vhost-user-allow-slave-to-send-fds-via-slave-channel.patch [bz#1526645] +- kvm-vhost-user-introduce-shared-vhost-user-state.patch [bz#1526645] +- kvm-vhost-user-support-registering-external-host-notifie.patch [bz#1526645] +- kvm-libvhost-user-support-host-notifier.patch [bz#1526645] +- kvm-block-Introduce-API-for-copy-offloading.patch [bz#1482537] +- kvm-raw-Check-byte-range-uniformly.patch [bz#1482537] +- kvm-raw-Implement-copy-offloading.patch [bz#1482537] +- kvm-qcow2-Implement-copy-offloading.patch [bz#1482537] +- kvm-file-posix-Implement-bdrv_co_copy_range.patch [bz#1482537] +- kvm-iscsi-Query-and-save-device-designator-when-opening.patch [bz#1482537] +- kvm-iscsi-Create-and-use-iscsi_co_wait_for_task.patch [bz#1482537] +- kvm-iscsi-Implement-copy-offloading.patch [bz#1482537] +- kvm-block-backend-Add-blk_co_copy_range.patch [bz#1482537] +- kvm-qemu-img-Convert-with-copy-offloading.patch [bz#1482537] +- kvm-qcow2-Fix-src_offset-in-copy-offloading.patch [bz#1482537] +- kvm-iscsi-Don-t-blindly-use-designator-length-in-respons.patch [bz#1482537] +- kvm-file-posix-Fix-EINTR-handling.patch [bz#1482537] +- kvm-usb-storage-Add-rerror-werror-properties.patch [bz#1595180] +- kvm-numa-clarify-error-message-when-node-index-is-out-of.patch [bz#1578381] +- kvm-qemu-iotests-Update-026.out.nocache-reference-output.patch [bz#1528541] +- kvm-qcow2-Free-allocated-clusters-on-write-error.patch [bz#1528541] +- kvm-qemu-iotests-Test-qcow2-not-leaking-clusters-on-writ.patch [bz#1528541] +- kvm-qemu-options-Add-missing-newline-to-accel-help-text.patch [bz#1586313] +- kvm-xhci-fix-guest-triggerable-assert.patch [bz#1594135] +- kvm-virtio-gpu-tweak-scanout-disable.patch [bz#1589634] +- kvm-virtio-gpu-update-old-resource-too.patch [bz#1589634] +- kvm-virtio-gpu-disable-scanout-when-backing-resource-is-.patch [bz#1589634] +- kvm-block-Don-t-silently-truncate-node-names.patch [bz#1549654] +- kvm-pr-helper-fix-socket-path-default-in-help.patch [bz#1533158] +- kvm-pr-helper-fix-assertion-failure-on-failed-multipath-.patch [bz#1533158] +- kvm-pr-manager-helper-avoid-SIGSEGV-when-writing-to-the-.patch [bz#1533158] +- kvm-pr-manager-put-stubs-in-.c-file.patch [bz#1533158] +- kvm-pr-manager-add-query-pr-managers-QMP-command.patch [bz#1533158] +- kvm-pr-manager-helper-report-event-on-connection-disconn.patch [bz#1533158] +- kvm-pr-helper-avoid-error-on-PR-IN-command-with-zero-req.patch [bz#1533158] +- kvm-pr-helper-Rework-socket-path-handling.patch [bz#1533158] +- kvm-pr-manager-helper-fix-memory-leak-on-event.patch [bz#1533158] +- kvm-object-fix-OBJ_PROP_LINK_UNREF_ON_RELEASE-ambivalenc.patch [bz#1556678] +- kvm-usb-hcd-xhci-test-add-a-test-for-ccid-hotplug.patch [bz#1556678] +- kvm-Revert-usb-release-the-created-buses.patch [bz#1556678] +- kvm-file-posix-Fix-creation-locking.patch [bz#1599335] +- kvm-file-posix-Unlock-FD-after-creation.patch [bz#1599335] +- kvm-ahci-trim-signatures-on-raise-lower.patch [bz#1584914] +- kvm-ahci-fix-PxCI-register-race.patch [bz#1584914] +- kvm-ahci-don-t-schedule-unnecessary-BH.patch [bz#1584914] +- kvm-qcow2-Fix-qcow2_truncate-error-return-value.patch [bz#1595173] +- kvm-block-Convert-.bdrv_truncate-callback-to-coroutine_f.patch [bz#1595173] +- kvm-qcow2-Remove-coroutine-trampoline-for-preallocate_co.patch [bz#1595173] +- kvm-block-Move-bdrv_truncate-implementation-to-io.c.patch [bz#1595173] +- kvm-block-Use-tracked-request-for-truncate.patch [bz#1595173] +- kvm-file-posix-Make-.bdrv_co_truncate-asynchronous.patch [bz#1595173] +- kvm-block-Fix-copy-on-read-crash-with-partial-final-clus.patch [bz#1590640] +- kvm-block-fix-QEMU-crash-with-scsi-hd-and-drive_del.patch [bz#1599515] +- kvm-virtio-rng-process-pending-requests-on-DRIVER_OK.patch [bz#1576743] +- kvm-file-posix-specify-expected-filetypes.patch [bz#1525829] +- kvm-iotests-add-test-226-for-file-driver-types.patch [bz#1525829] +- kvm-block-dirty-bitmap-add-lock-to-bdrv_enable-disable_d.patch [bz#1207657] +- kvm-qapi-add-x-block-dirty-bitmap-enable-disable.patch [bz#1207657] +- kvm-qmp-transaction-support-for-x-block-dirty-bitmap-ena.patch [bz#1207657] +- kvm-qapi-add-x-block-dirty-bitmap-merge.patch [bz#1207657] +- kvm-qapi-add-disabled-parameter-to-block-dirty-bitmap-ad.patch [bz#1207657] +- kvm-block-dirty-bitmap-add-bdrv_enable_dirty_bitmap_lock.patch [bz#1207657] +- kvm-dirty-bitmap-fix-double-lock-on-bitmap-enabling.patch [bz#1207657] +- kvm-block-qcow2-bitmap-fix-free_bitmap_clusters.patch [bz#1207657] +- kvm-qcow2-add-overlap-check-for-bitmap-directory.patch [bz#1207657] +- kvm-blockdev-enable-non-root-nodes-for-backup-source.patch [bz#1207657] +- kvm-iotests-add-222-to-test-basic-fleecing.patch [bz#1207657] +- kvm-qcow2-Remove-dead-check-on-ret.patch [bz#1207657] +- kvm-block-Move-request-tracking-to-children-in-copy-offl.patch [bz#1207657] +- kvm-block-Fix-parameter-checking-in-bdrv_co_copy_range_i.patch [bz#1207657] +- kvm-block-Honour-BDRV_REQ_NO_SERIALISING-in-copy-range.patch [bz#1207657] +- kvm-backup-Use-copy-offloading.patch [bz#1207657] +- kvm-block-backup-disable-copy-offloading-for-backup.patch [bz#1207657] +- kvm-iotests-222-Don-t-run-with-luks.patch [bz#1207657] +- kvm-block-io-fix-copy_range.patch [bz#1207657] +- kvm-block-split-flags-in-copy_range.patch [bz#1207657] +- kvm-block-add-BDRV_REQ_SERIALISING-flag.patch [bz#1207657] +- kvm-block-backup-fix-fleecing-scheme-use-serialized-writ.patch [bz#1207657] +- kvm-nbd-server-Reject-0-length-block-status-request.patch [bz#1207657] +- kvm-nbd-server-fix-trace.patch [bz#1207657] +- kvm-nbd-server-refactor-NBDExportMetaContexts.patch [bz#1207657] +- kvm-nbd-server-add-nbd_meta_empty_or_pattern-helper.patch [bz#1207657] +- kvm-nbd-server-implement-dirty-bitmap-export.patch [bz#1207657] +- kvm-qapi-new-qmp-command-nbd-server-add-bitmap.patch [bz#1207657] +- kvm-docs-interop-add-nbd.txt.patch [bz#1207657] +- kvm-nbd-server-introduce-NBD_CMD_CACHE.patch [bz#1207657] +- kvm-nbd-server-Silence-gcc-false-positive.patch [bz#1207657] +- kvm-nbd-server-Fix-dirty-bitmap-logic-regression.patch [bz#1207657] +- kvm-nbd-server-fix-nbd_co_send_block_status.patch [bz#1207657] +- kvm-nbd-client-Add-x-dirty-bitmap-to-query-bitmap-from-s.patch [bz#1207657] +- kvm-iotests-New-test-223-for-exporting-dirty-bitmap-over.patch [bz#1207657] +- kvm-hw-char-serial-Only-retry-if-qemu_chr_fe_write-retur.patch [bz#1592817] +- kvm-hw-char-serial-retry-write-if-EAGAIN.patch [bz#1592817] +- kvm-throttle-groups-fix-hang-when-group-member-leaves.patch [bz#1535914] +- kvm-Disable-aarch64-devices-reappeared-after-2.12-rebase.patch [bz#1586357] +- kvm-Disable-split-irq-device.patch [bz#1586357] +- kvm-Disable-AT24Cx-i2c-eeprom.patch [bz#1586357] +- kvm-Disable-CAN-bus-devices.patch [bz#1586357] +- kvm-Disable-new-superio-devices.patch [bz#1586357] +- kvm-Disable-new-pvrdma-device.patch [bz#1586357] +- kvm-qdev-add-HotplugHandler-post_plug-callback.patch [bz#1607891] +- kvm-virtio-scsi-fix-hotplug-reset-vs-event-race.patch [bz#1607891] +- kvm-e1000-Fix-tso_props-compat-for-82540em.patch [bz#1608778] +- kvm-slirp-correct-size-computation-while-concatenating-m.patch [bz#1586255] +- kvm-s390x-sclp-fix-maxram-calculation.patch [bz#1595740] +- kvm-redhat-Make-gitpublish-profile-the-default-one.patch [bz#1425820] +- Resolves: bz#1168213 + (main-loop: WARNING: I/O thread spun for 1000 iterations while doing stream block device.) +- Resolves: bz#1207657 + (RFE: QEMU Incremental live backup - push and pull modes) +- Resolves: bz#1416180 + (QEMU VFIO based block driver for NVMe devices) +- Resolves: bz#1425820 + (Improve QEMU packaging layout with modularization of the block layer) +- Resolves: bz#1482537 + ([RFE] qemu-img copy-offloading (convert command)) +- Resolves: bz#1505664 + ("qemu-kvm: System page size 0x1000000 is not enabled in page_size_mask (0x11000). Performance may be slow" show up while using hugepage as guest's memory) +- Resolves: bz#1513543 + ([RFE] Add block job to create format on a storage device) +- Resolves: bz#1518738 + (Add 'copy-on-read' filter driver for use with blockdev-add) +- Resolves: bz#1519144 + (qemu-img: image locking doesn't cover image creation) +- Resolves: bz#1519617 + (The exit code should be non-zero when qemu-io reports an error) +- Resolves: bz#1523065 + ("qemu-img resize" should fail to decrease the size of logical partition/lvm/iSCSI image with raw format) +- Resolves: bz#1525829 + (can not boot up a scsi-block passthrough disk via -blockdev with error "cannot get SG_IO version number: Operation not supported. Is this a SCSI device?") +- Resolves: bz#1526645 + ([Intel 7.6 FEAT] vHost Data Plane Acceleration (vDPA) - vhost user client - qemu-kvm-rhev) +- Resolves: bz#1527085 + (The copied flag should be updated during '-r leaks') +- Resolves: bz#1527898 + ([RFE] qemu-img should leave cluster unallocated if it's read as zero throughout the backing chain) +- Resolves: bz#1528541 + (qemu-img check reports tons of leaked clusters after re-start nfs service to resume writing data in guest) +- Resolves: bz#1533158 + (QEMU support for libvirtd restarting qemu-pr-helper) +- Resolves: bz#1535914 + (Disable io throttling for one member disk of a group during io will induce the other one hang with io) +- Resolves: bz#1537956 + (RFE: qemu-img amend should list the true supported options) +- Resolves: bz#1542080 + (Qemu core dump at cirrus_invalidate_region) +- Resolves: bz#1549654 + (Reject node-names which would be truncated by the block layer commands) +- Resolves: bz#1556678 + (Hot plug usb-ccid for the 2nd time with the same ID as the 1st time failed) +- Resolves: bz#1557995 + (QAPI schema for RBD storage misses the 'password-secret' option) +- Resolves: bz#1558723 + (Create RHEL-7.6 QEMU machine type for AArch64) +- Resolves: bz#1560847 + ([Power8][FW b0320a_1812.861][rhel7.5rc2 3.10.0-861.el7.ppc64le][qemu-kvm-{ma,rhev}-2.10.0-21.el7_5.1.ppc64le] KVM guest does not default to ori type flush even with pseries-rhel7.5.0-sxxm) +- Resolves: bz#1564576 + (Pegas 1.1 - Require to backport qemu-kvm patch that fixes expected_downtime calculation during migration) +- Resolves: bz#1566153 + (IOERROR pause code lost after resuming a VM while I/O error is still present) +- Resolves: bz#1567733 + (qemu abort when migrate during guest reboot) +- Resolves: bz#1569835 + (qemu-img get wrong backing file path after rebasing image with relative path) +- Resolves: bz#1572851 + (Core dumped after migration when with usb-host) +- Resolves: bz#1572856 + ('block-job-cancel' can not cancel a "drive-mirror" job) +- Resolves: bz#1574216 + (CVE-2018-3639 qemu-kvm-rhev: hw: cpu: speculative store bypass [rhel-7.6]) +- Resolves: bz#1575541 + (qemu core dump while installing win10 guest) +- Resolves: bz#1576598 + (Segfault in qemu-io and qemu-img with -U --image-opts force-share=off) +- Resolves: bz#1576743 + (virtio-rng hangs when running on recent (2.x) QEMU versions) +- Resolves: bz#1578381 + (Error message need update when specify numa distance with node index >=128) +- Resolves: bz#1583959 + (Incorrect vcpu count limit for 7.4 machine types for windows guests) +- Resolves: bz#1584914 + (SATA emulator lags and hangs) +- Resolves: bz#1584984 + (Vm starts failed with 'passthrough' smartcard) +- Resolves: bz#1586255 + (CVE-2018-11806 qemu-kvm-rhev: QEMU: slirp: heap buffer overflow while reassembling fragmented datagrams [rhel-7.6]) +- Resolves: bz#1586313 + (-smp option is not easily found in the output of qemu help) +- Resolves: bz#1586357 + (Disable new devices in 2.12) +- Resolves: bz#1588039 + (Possible assertion failure in qemu when a corrupted image is used during an incoming migration) +- Resolves: bz#1589634 + (Migration failed when rebooting guest with multiple virtio videos) +- Resolves: bz#1590640 + (qemu-kvm: block/io.c:1098: bdrv_co_do_copy_on_readv: Assertion `skip_bytes < pnum' failed.) +- Resolves: bz#1591076 + (The driver of 'throttle' is not whitelisted) +- Resolves: bz#1592817 + (Retrying on serial_xmit if the pipe is broken may compromise the Guest) +- Resolves: bz#1594135 + (system_reset many times linux guests cause qemu process Aborted) +- Resolves: bz#1595173 + (blockdev-create is blocking) +- Resolves: bz#1595180 + (Can't set rerror/werror with usb-storage) +- Resolves: bz#1595740 + (RHEL-Alt-7.6 - qemu has error during migration of larger guests) +- Resolves: bz#1599335 + (Image creation locking is too tight and is not properly released) +- Resolves: bz#1599515 + (qemu core-dump with aio_read via hmp (util/qemu-thread-posix.c:64: qemu_mutex_lock_impl: Assertion `mutex->initialized' failed)) +- Resolves: bz#1607891 + (Hotplug events are sometimes lost with virtio-scsi + iothread) +- Resolves: bz#1608778 + (qemu/migration: migrate failed from RHEL.7.6 to RHEL.7.5 with e1000-82540em) + +* Mon Aug 06 2018 Danilo Cesar Lemes de Paula - 2.12.0-17.el8 +- kvm-linux-headers-Update-to-include-KVM_CAP_S390_HPAGE_1.patch [bz#1610906] +- kvm-s390x-Enable-KVM-huge-page-backing-support.patch [bz#1610906] +- kvm-redhat-s390x-add-hpage-1-to-kvm.conf.patch [bz#1610906] +- Resolves: bz#1610906 + ([IBM 8.0 FEAT] KVM: Huge Pages - libhugetlbfs Enablement - qemu-kvm part) + +* Tue Jul 31 2018 Danilo Cesar Lemes de Paula - 2.12.0-16.el8 +- kvm-spapr-Correct-inverted-test-in-spapr_pc_dimm_node.patch [bz#1601671] +- kvm-osdep-powerpc64-align-memory-to-allow-2MB-radix-THP-.patch [bz#1601317] +- kvm-RHEL-8.0-Add-pseries-rhel7.6.0-sxxm-machine-type.patch [bz#1595501] +- kvm-i386-Helpers-to-encode-cache-information-consistentl.patch [bz#1597739] +- kvm-i386-Add-cache-information-in-X86CPUDefinition.patch [bz#1597739] +- kvm-i386-Initialize-cache-information-for-EPYC-family-pr.patch [bz#1597739] +- kvm-i386-Add-new-property-to-control-cache-info.patch [bz#1597739] +- kvm-i386-Clean-up-cache-CPUID-code.patch [bz#1597739] +- kvm-i386-Populate-AMD-Processor-Cache-Information-for-cp.patch [bz#1597739] +- kvm-i386-Add-support-for-CPUID_8000_001E-for-AMD.patch [bz#1597739] +- kvm-i386-Fix-up-the-Node-id-for-CPUID_8000_001E.patch [bz#1597739] +- kvm-i386-Enable-TOPOEXT-feature-on-AMD-EPYC-CPU.patch [bz#1597739] +- kvm-i386-Remove-generic-SMT-thread-check.patch [bz#1597739] +- kvm-i386-Allow-TOPOEXT-to-be-enabled-on-older-kernels.patch [bz#1597739] +- Resolves: bz#1595501 + (Create pseries-rhel7.6.0-sxxm machine type) +- Resolves: bz#1597739 + (AMD EPYC/Zen SMT support for KVM / QEMU guest (qemu-kvm)) +- Resolves: bz#1601317 + (RHEL8.0 - qemu patch to align memory to allow 2MB THP) +- Resolves: bz#1601671 + (After rebooting guest,all the hot plug memory will be assigned to the 1st numa node.) + +* Tue Jul 24 2018 Danilo Cesar Lemes de Paula - 2.12.0-15.el8 +- kvm-spapr-Add-ibm-max-associativity-domains-property.patch [bz#1599593] +- kvm-Revert-spapr-Don-t-allow-memory-hotplug-to-memory-le.patch [bz#1599593] +- kvm-simpletrace-Convert-name-from-mapping-record-to-str.patch [bz#1594969] +- kvm-tests-fix-TLS-handshake-failure-with-TLS-1.3.patch [bz#1602403] +- Resolves: bz#1594969 + (simpletrace.py fails when running with Python 3) +- Resolves: bz#1599593 + (User can't hotplug memory to less memory numa node on rhel8) +- Resolves: bz#1602403 + (test-crypto-tlssession unit test fails with assertions) + +* Mon Jul 09 2018 Danilo Cesar Lemes de Paula - 2.12.0-14.el8 +- kvm-vfio-pci-Default-display-option-to-off.patch [bz#1590511] +- kvm-python-futurize-f-libfuturize.fixes.fix_print_with_i.patch [bz#1571533] +- kvm-python-futurize-f-lib2to3.fixes.fix_except.patch [bz#1571533] +- kvm-Revert-Defining-a-shebang-for-python-scripts.patch [bz#1571533] +- kvm-spec-Fix-ambiguous-python-interpreter-name.patch [bz#1571533] +- kvm-qemu-ga-blacklisting-guest-exec-and-guest-exec-statu.patch [bz#1518132] +- kvm-redhat-rewrap-build_configure.sh-cmdline-for-the-rh-.patch [] +- kvm-redhat-remove-the-VTD-LIVE_BLOCK_OPS-and-RHV-options.patch [] +- kvm-redhat-fix-the-rh-env-prep-target-s-dependency-on-th.patch [] +- kvm-redhat-remove-dead-code-related-to-s390-not-s390x.patch [] +- kvm-redhat-sync-compiler-flags-from-the-spec-file-to-rh-.patch [] +- kvm-redhat-sync-guest-agent-enablement-and-tcmalloc-usag.patch [] +- kvm-redhat-fix-up-Python-3-dependency-for-building-QEMU.patch [] +- kvm-redhat-fix-up-Python-dependency-for-SRPM-generation.patch [] +- kvm-redhat-disable-glusterfs-dependency-support-temporar.patch [] +- Resolves: bz#1518132 + (Ensure file access RPCs are disabled by default) +- Resolves: bz#1571533 + (Convert qemu-kvm python scripts to python3) +- Resolves: bz#1590511 + (Fails to start guest with Intel vGPU device) + +* Thu Jun 21 2018 Danilo C. L. de Paula - 2.12.0-13.el8 +- Resolves: bz#1508137 + ([IBM 8.0 FEAT] KVM: Interactive Bootloader (qemu)) +- Resolves: bz#1513558 + (Remove RHEL6 machine types) +- Resolves: bz#1568600 + (pc-i440fx-rhel7.6.0 and pc-q35-rhel7.6.0 machine types (x86)) +- Resolves: bz#1570029 + ([IBM 8.0 FEAT] KVM: 3270 Connectivity - qemu part) +- Resolves: bz#1578855 + (Enable Native Ceph support on non x86_64 CPUs) +- Resolves: bz#1585651 + (RHEL 7.6 new pseries machine type (ppc64le)) +- Resolves: bz#1592337 + ([IBM 8.0 FEAT] KVM: CPU Model z14 ZR1 (qemu-kvm)) + +* Tue May 15 2018 Danilo C. L. de Paula - 2.12.0-11.el8.1 +- Resolves: bz#1576468 + (Enable vhost_user in qemu-kvm 2.12) + +* Wed May 09 2018 Danilo de Paula - 2.12.0-11.el8 +- Resolves: bz#1574406 + ([RHEL 8][qemu-kvm] Failed to find romfile "efi-virtio.rom") +- Resolves: bz#1569675 + (Backwards compatibility of pc-*-rhel7.5.0 and older machine-types) +- Resolves: bz#1576045 + (Fix build issue by using python3) +- Resolves: bz#1571145 + (qemu-kvm segfaults on RHEL 8 when run guestfsd under TCG) + +* Fri Apr 20 2018 Danilo de Paula - 2.12.0-10.el +- Fixing some issues with packaging. +- Rebasing to 2.12.0-rc4 + +* Fri Apr 13 2018 Danilo de Paula - 2.11.0-7.el8 +- Bumping epoch for RHEL8 and dropping self-obsoleting + +* Thu Apr 12 2018 Danilo de Paula - 2.11.0-6.el8 +- Rebuilding + +* Mon Mar 05 2018 Danilo de Paula - 2.11.0-5.el8 +- Prepare building on RHEL-8.0 diff --git a/qemu-pr-helper.service b/qemu-pr-helper.service new file mode 100644 index 0000000..a1d27b0 --- /dev/null +++ b/qemu-pr-helper.service @@ -0,0 +1,15 @@ +[Unit] +Description=Persistent Reservation Daemon for QEMU + +[Service] +WorkingDirectory=/tmp +Type=simple +ExecStart=/usr/bin/qemu-pr-helper +PrivateTmp=yes +ProtectSystem=strict +ReadWritePaths=/var/run +RestrictAddressFamilies=AF_UNIX +Restart=always +RestartSec=0 + +[Install] diff --git a/qemu-pr-helper.socket b/qemu-pr-helper.socket new file mode 100644 index 0000000..9d7c3e5 --- /dev/null +++ b/qemu-pr-helper.socket @@ -0,0 +1,9 @@ +[Unit] +Description=Persistent Reservation Daemon for QEMU + +[Socket] +ListenStream=/run/qemu-pr-helper.sock +SocketMode=0600 + +[Install] +WantedBy=multi-user.target diff --git a/rpminspect.yaml b/rpminspect.yaml new file mode 100644 index 0000000..889796d --- /dev/null +++ b/rpminspect.yaml @@ -0,0 +1,11 @@ +--- +elf: + exclude_path: (.*s390-ccw.img.*)|(.*s390-netboot.img.*) +inspections: + badfuncs: off +annocheck: + - hardened: --skip-cf-protection --skip-property-note --ignore-unknown --verbose + - rhel-policy: --skip-cf-protection --skip-property-note --ignore-unknown --verbose + ignore: + - /usr/share/qemu-kvm/s390-ccw.img + - /usr/share/qemu-kvm/s390-netboot.img diff --git a/sources b/sources new file mode 100644 index 0000000..b6290ac --- /dev/null +++ b/sources @@ -0,0 +1 @@ +SHA1 (qemu-6.2.0.tar.xz) = 68cd61a466170115b88817e2d52db2cd7a92f43a diff --git a/tests_data_acpi_pc_SSDT.dimmpxm b/tests_data_acpi_pc_SSDT.dimmpxm new file mode 100644 index 0000000..ac55387 Binary files /dev/null and b/tests_data_acpi_pc_SSDT.dimmpxm differ diff --git a/tests_data_acpi_q35_FACP.slic b/tests_data_acpi_q35_FACP.slic new file mode 100644 index 0000000..15986e0 Binary files /dev/null and b/tests_data_acpi_q35_FACP.slic differ diff --git a/tests_data_acpi_q35_SSDT.dimmpxm b/tests_data_acpi_q35_SSDT.dimmpxm new file mode 100644 index 0000000..98e6f0e Binary files /dev/null and b/tests_data_acpi_q35_SSDT.dimmpxm differ diff --git a/tests_data_acpi_virt_SSDT.memhp b/tests_data_acpi_virt_SSDT.memhp new file mode 100644 index 0000000..375d7b6 Binary files /dev/null and b/tests_data_acpi_virt_SSDT.memhp differ diff --git a/udev-kvm-check.c b/udev-kvm-check.c new file mode 100644 index 0000000..928b9de --- /dev/null +++ b/udev-kvm-check.c @@ -0,0 +1,155 @@ +/* + * udev-kvm-check.c + * + * Copyright 2018 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + */ + +#include +#include +#include +#include +#include + +#define DEFAULT 0 +#define FACILITY "kvm" +#define SYSCONFIG_KVM "/etc/sysconfig/kvm" + +#define COUNT_MSG \ + "%d %s now active" + +int get_threshold_from_file(FILE *fp) +{ + static const char key[] = "THRESHOLD="; + int pos = 0; + int thres; + int ch; + +start: + /* State START - at beginning of line, search for beginning of "THRESHOLD=" + * string. + */ + ch = getc(fp); + if (ch == EOF) { + return DEFAULT; + } + if (isspace(ch)) { + goto start; + } + if (ch == 'T') { + pos = 1; + goto key; + } + goto eol; + +eol: + /* State EOL - loop until end of line */ + ch = getc(fp); + if (ch == EOF) { + return DEFAULT; + } + if (ch == '\n') { + goto start; + } + goto eol; + +key: + /* State KEY - match "THRESHOLD=" string, go to THRESHOLD if found */ + ch = getc(fp); + if (ch == EOF) { + return DEFAULT; + } + if (ch == key[pos]) { + pos++; + if (key[pos] == 0) { + goto threshold; + } else { + goto key; + } + } + goto eol; + +threshold: + /* State THRESHOLD - parse number using fscanf, expect comment or space + * or EOL. + */ + ch = getc(fp); + if (ch == EOF) { + return DEFAULT; + } + if (!isdigit(ch)) { + goto eol; + } + ungetc(ch, fp); + if (fscanf(fp, "%d", &thres) != 1) { + return DEFAULT; + } + ch = getc(fp); + if (ch == '#' || ch == EOF || ch == '\n' || isspace(ch)) { + return thres; + } + goto eol; +} + +int get_threshold() +{ + FILE *fp = fopen(SYSCONFIG_KVM, "r"); + int val; + + if (!fp) { + return DEFAULT; + } + + val = get_threshold_from_file(fp); + fclose (fp); + return val; +} + +const char *guest(int count) +{ + return (count == 1 ? "guest" : "guests"); +} + +void emit_count_message(int count) +{ + openlog(FACILITY, LOG_CONS, LOG_USER); + syslog(LOG_INFO, COUNT_MSG, count, guest(count)); + closelog(); +} + +int main(int argc, char **argv) +{ + int count, threshold; + + if (argc < 3) + exit(1); + + count = atoi(argv[1]); + threshold = get_threshold(); + + if (!strcmp(argv[2], "create")) { + if (threshold == 0 || count > threshold) { + emit_count_message(count); + } + } else { + if (count >= threshold) { + emit_count_message(count); + } + } + + return 0; +} diff --git a/vhost.conf b/vhost.conf new file mode 100644 index 0000000..68d6d7f --- /dev/null +++ b/vhost.conf @@ -0,0 +1,3 @@ +# Increase default vhost memory map limit to match +# KVM's memory slot limit +options vhost max_mem_regions=509